framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,float16,0,5.353141148885091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,fp8,fp8,0,4.89467207590739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,fp8,0,5.391530354817708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,31.531173706054688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,float16,0,5.413893381754558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,fp8,0,5.454170862833659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,128,1,fp8,fp8,0,4.969840049743652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,31.534955342610676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,29.125274658203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,31.63537089029948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,float16,0,5.455973307291667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,fp8,0,5.4993438720703125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,31.68077341715495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,128,1,fp8,fp8,0,5.014431953430176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,29.186208089192707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,float16,0,3.146010716756185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,31.7153803507487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,fp8,0,3.2224960327148438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,16.428155263264973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,128,1,fp8,fp8,0,2.9988905588785806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,29.248926798502605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,31.746742248535156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,float16,0,2.7924960454305015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,fp8,0,2.815018653869629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,128,1,fp8,fp8,0,2.557781378428141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,15.179035186767578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,16.501285552978516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,float16,0,2.7999467849731445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,15.94043223063151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,14.7249387105306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,fp8,0,2.8261439005533853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,128,1,fp8,fp8,0,2.5719520250956216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,15.947765350341797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,float16,0,2.8149919509887695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,15.989962259928385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,14.740623474121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,fp8,0,2.8442986806233725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,128,1,fp8,fp8,0,2.5913119316101074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,15.984464009602865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,float16,0,1.7135200500488281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,14.757823944091797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,16.0264155069987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,8.406314849853516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,fp8,0,1.7600800196329753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,128,1,fp8,fp8,0,1.6525386174519856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,float16,0,1.550831953684489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,8.449871699015299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,7.790821075439453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,fp8,fp8,0,1.4359572728474934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,fp8,0,1.5586719512939453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,8.167994817097982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,float16,0,1.555770715077718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,7.557173411051433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,8.183797200520834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,fp8,0,1.566208044687907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,128,1,fp8,fp8,0,1.4434773127237956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,8.185525258382162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,float16,0,1.562320073445638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,7.574399948120117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,8.191263834635416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,fp8,0,1.5758026440938313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,128,1,fp8,fp8,0,1.4503413836161296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,8.20686403910319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,float16,0,1.1656853357950847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,7.578208287556966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,8.220853169759115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,4.565610567728679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,fp8,0,1.1659573713938396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,128,1,fp8,fp8,0,1.0910666783650715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,float16,0,1.166863997777303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,4.566656112670898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,4.206106821695964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,fp8,0,1.1678880055745442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,128,1,fp8,fp8,0,1.0916533470153809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,4.526965459187825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,float16,0,1.1674933433532715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,4.200592041015625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,4.528271993001302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,fp8,0,1.1675199667612712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,128,1,fp8,fp8,0,1.0915359656016033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,4.532698631286621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,float16,0,1.166933298110962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,4.204448064168294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,4.530250549316406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,fp8,0,1.1680746873219807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,128,1,fp8,fp8,0,1.0914133389790852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,4.537893295288086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,4.202591896057129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,4.537242571512858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,float16,0,3.9924745559692383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,fp8,fp8,0,3.631194750467936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,fp8,0,4.0219465891520185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,18.558282216389973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,17.15601094563802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,float16,0,4.006629308064778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,18.61349868774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,fp8,0,4.040005366007487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,128,1,fp8,fp8,0,3.671589215596517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,18.622992197672527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,float16,0,4.033061345418294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,18.638245900472004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,fp8,0,4.067829449971517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,128,1,fp8,fp8,0,3.7028373082478843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,18.666336059570312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,18.71837870279948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,17.218597412109375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,float16,0,2.355327924092611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,fp8,0,2.4117013613382974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,128,1,fp8,fp8,0,2.242565313975016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,9.783482869466146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,9.847605387369791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,float16,0,2.096229394276937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,9.04750951131185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,fp8,0,2.110602696736654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,128,1,fp8,fp8,0,1.9195520083109539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,9.42733891805013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,9.4389279683431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,float16,0,2.1044586499532065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,8.71894391377767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,fp8,0,2.1200106938680015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,128,1,fp8,fp8,0,1.9302666982014973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,9.445013046264648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,9.467103958129883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,float16,0,2.1141600608825684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,8.726933161417643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,fp8,0,2.1320746739705405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,128,1,fp8,fp8,0,1.9449599583943684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,9.478938420613607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,9.495162963867188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,float16,0,1.2878080209096272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,8.745951970418295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,fp8,0,1.3225493431091309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,128,1,fp8,fp8,0,1.2423466841379802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,5.043221473693848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,4.6802934010823565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,5.078181266784668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,float16,0,1.1661760012308757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,fp8,0,1.1744159857432048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,128,1,fp8,fp8,0,1.079583962758382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,4.872687975565593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,4.878112157185872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,4.516037305196126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,float16,0,1.1713120142618816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,fp8,0,1.179749329884847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,128,1,fp8,fp8,0,1.0852586428324382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,4.888511975606282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,4.890058517456055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,4.521039962768555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,float16,0,1.176965316136678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,fp8,0,1.185754696528117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,128,1,fp8,fp8,0,1.0919626553853352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,4.900778770446777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,4.908080101013184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,4.532320022583008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,float16,0,0.8796640237172445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,fp8,0,0.8797439734141032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,128,1,fp8,fp8,0,0.8242026964823405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,2.796581268310547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,2.7957919438680015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,2.5798187255859375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,float16,0,0.8804426987965902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,fp8,0,0.8799733320871989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,2.7714932759602866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,128,1,fp8,fp8,0,0.824506680170695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,2.577690601348877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,2.7713705698649087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,float16,0,0.880400021870931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,fp8,0,0.8800693353017172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,2.773226737976074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,128,1,fp8,fp8,0,0.8240479628245035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,2.7741918563842773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,2.576314608256022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,float16,0,0.8806506792704264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,fp8,0,0.8805013497670492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,2.7785813013712564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,128,1,fp8,fp8,0,0.8231626351674398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,2.778789202372233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,2.5770986874898276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,float16,0,3.3121973673502603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,fp8,0,3.3403307596842446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,128,1,fp8,fp8,0,3.0160853068033853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,13.370160420735678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,13.38873545328776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,12.34335962931315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,float16,0,3.324847857157389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,fp8,0,3.3541812896728516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,128,1,fp8,fp8,0,3.0393174489339194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,13.405418395996094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,13.42633056640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,12.366341908772787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,float16,0,3.344938596089681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,fp8,0,3.37605349222819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,128,1,fp8,fp8,0,3.064432144165039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,13.45074717203776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,13.473776499430338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,float16,0,1.9596907297770183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,12.393658955891928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,fp8,0,2.0087626775105796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,128,1,fp8,fp8,0,1.8695947329203289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,7.092479705810547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,7.149445215861003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,float16,0,1.7447253863016765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,6.569205602010091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,fp8,0,1.7597707112630208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,6.799194971720378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,128,1,fp8,fp8,0,1.600554625193278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,6.817248026529948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,6.2934722900390625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,float16,0,1.7538720766703289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,fp8,0,1.768112023671468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,128,1,fp8,fp8,0,1.6106346448262532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,6.829247792561849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,6.835194905598958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,6.30404790242513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,float16,0,1.7609814008076985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,fp8,0,1.778821309407552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,128,1,fp8,fp8,0,1.6229227383931477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,6.8484853108723955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,6.860442479451497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,6.316111882527669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,float16,0,1.0757493178049724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,fp8,0,1.1039199829101562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,128,1,fp8,fp8,0,1.0386133193969727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,3.6765174865722656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,3.418389320373535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,3.702005386352539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,float16,0,0.9763200283050537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,fp8,0,0.9818613529205322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,128,1,fp8,fp8,0,0.9026186466217041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,3.534794807434082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,3.279184023539225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,3.5437758763631186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,float16,0,0.9784533182779948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,fp8,0,0.9859253565470377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,128,1,fp8,fp8,0,0.9083519776662191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,3.5470507939656577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,3.283797264099121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,3.553173383076986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,float16,0,0.9834187030792236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,fp8,0,0.9913973013559977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,128,1,fp8,fp8,0,0.9145440260569254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,3.5563198725382485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,3.290719985961914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,3.5664587020874023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,float16,0,0.7370026906331381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,fp8,0,0.7364373207092285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,2.0670666694641113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,128,1,fp8,fp8,0,0.6895786921183268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,2.0675412813822427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,1.9079893430074055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,float16,0,0.736565351486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,fp8,0,0.7363359928131104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,2.0483946800231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,128,1,fp8,fp8,0,0.6899147033691406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,2.051413377126058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,1.9084213574727376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,float16,0,0.7374933560689291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,fp8,0,0.7380320231119791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,2.0519307454427085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,128,1,fp8,fp8,0,0.6900746822357178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,2.052058696746826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,1.9063040415445964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,float16,0,0.7369333108266195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,fp8,0,0.7380053202311198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,2.0534613927205405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,128,1,fp8,fp8,0,0.6902240117390951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,2.054543972015381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,1.9083040555318196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,float16,0,5.198591868082683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,fp8,0,5.2366078694661455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,128,1,fp8,fp8,0,4.737845420837402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,17.864432017008465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,16.484347025553387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,17.892859141031902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,float16,0,5.264938672383626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,fp8,0,5.303066571553548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,128,1,fp8,fp8,0,4.824133237202962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,17.95702870686849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,16.557706197102863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,18.01805369059245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,float16,0,5.298314730326335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,fp8,0,5.3384749094645185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,128,1,fp8,fp8,0,4.857407887776692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,18.044335683186848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,16.607205708821613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,18.07054392496745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,float16,0,3.0028692881266275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,fp8,0,3.0691200892130532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,9.460831960042318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,128,1,fp8,fp8,0,2.853642781575521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,9.528122584025065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,8.761407852172852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,float16,0,2.6458826065063477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,fp8,0,2.6690667470296225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,128,1,fp8,fp8,0,2.4126879374186196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,9.006698608398438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,9.021461486816406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,8.308479944864908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,float16,0,2.656874656677246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,fp8,0,2.681701342264811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,9.036816279093424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,128,1,fp8,fp8,0,2.4306880633036294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,8.324005126953125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,9.059743881225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,float16,0,2.672074635823568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,fp8,0,2.697381337483724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,128,1,fp8,fp8,0,2.44922669728597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,9.070512135823568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,9.08896509806315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,float16,0,1.56766939163208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,4.822325388590495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,fp8,0,1.6068639755249023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,128,1,fp8,fp8,0,1.4977493286132812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,4.86351998647054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,4.4754025141398115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,float16,0,1.3980587323506672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,4.602202733357747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,fp8,0,1.4105653762817383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,128,1,fp8,fp8,0,1.28438401222229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,4.613823890686035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,4.2543519337972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,float16,0,1.4026133219401042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,4.616223971048991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,fp8,0,1.4169813791910808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,128,1,fp8,fp8,0,1.2922879854838054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,4.629151980082194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,4.263274510701497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,float16,0,1.4095093409220378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,4.631205240885417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,fp8,0,1.4226826032002766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,128,1,fp8,fp8,0,1.300554672876994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,4.645562807718913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,4.27286942799886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,float16,0,0.8653226693471273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,2.5165759722391763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,fp8,0,0.8878613313039144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,128,1,fp8,fp8,0,0.8338720003763834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,2.5402132670084634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,2.347071965535482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,float16,0,0.7834239800771078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,2.410597324371338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,fp8,0,0.7892853418986002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,128,1,fp8,fp8,0,0.7261280218760172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,2.416266600290934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,2.235930601755778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,float16,0,0.7882880369822184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,2.420639991760254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,fp8,0,0.7931946913401285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,128,1,fp8,fp8,0,0.730410655339559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,2.425322691599528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,2.240586598714193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,float16,0,0.789680004119873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,2.4262879689534507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,fp8,0,0.7963733673095703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,128,1,fp8,fp8,0,0.7345653374989828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,2.4342293739318848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,2.244314670562744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,float16,0,0.5931893189748129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,1.4446080525716145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,fp8,0,0.5946400165557861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,128,1,fp8,fp8,0,0.5561066468556722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,1.44485870997111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,1.3358240127563477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,float16,0,0.5935999949773153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,1.4311572710673015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,fp8,0,0.594869335492452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,128,1,fp8,fp8,0,0.5561600128809611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,1.4316959381103516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,1.3334933916727703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,float16,0,0.5930453141530355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,1.434127966562907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,fp8,0,0.5944213469823202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,128,1,fp8,fp8,0,0.5566346645355225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,1.4347893397013347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,1.3348906834920247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,float16,0,0.5933813254038492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,1.435813268025716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,fp8,0,0.5940586725870768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,128,1,fp8,fp8,0,0.555733323097229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,1.4373760223388672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,1.3351893424987793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,float16,0,3.860842704772949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,10.812943776448568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,fp8,0,3.893141428629557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,128,1,fp8,fp8,0,3.5088745752970376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,10.846735636393229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,9.957520167032877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,float16,0,3.881797472635905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,10.859408060709635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,fp8,0,3.915482521057129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,128,1,fp8,fp8,0,3.54963747660319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,10.89199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,9.99889055887858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,float16,0,3.904911994934082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,10.904436747233072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,fp8,0,3.938277244567871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,128,1,fp8,fp8,0,3.5806986490885415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,10.939802805582682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,10.030138651529947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,float16,0,2.2451732953389487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,5.804309209187825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,fp8,0,2.2950453758239746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,128,1,fp8,fp8,0,2.132143974304199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,5.855093638102214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,5.384218851725261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,float16,0,1.9813547134399414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,5.478000005086263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,fp8,0,1.99837859471639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,128,1,fp8,fp8,0,1.810373306274414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,5.49349848429362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,5.052048047383626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,float16,0,1.990682601928711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,5.498565038045247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,fp8,0,2.0083306630452475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,128,1,fp8,fp8,0,1.8228212992350261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,5.51686414082845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,5.0655520757039385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,float16,0,2.002474625905355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,5.523173650105794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,15.923199971516928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,fp8,0,2.022117296854655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,128,1,fp8,fp8,0,1.8347999254862468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,5.5420481363932295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,5.073957443237305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,float16,0,1.1791786352793376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,fp8,0,1.2080480257670085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,2.97710386912028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,128,1,fp8,fp8,0,1.1280372937520344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,3.0066134134928384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,2.7713438669840493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,float16,0,1.051632006963094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,fp8,0,1.0600372950236003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,2.8193333943684897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,128,1,fp8,fp8,0,0.966111977895101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,2.6047040621439614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,2.8282346725463867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,float16,0,1.0561227003733318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,fp8,0,1.0658133029937744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,2.831146558125814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,128,1,fp8,fp8,0,0.9737652937571207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,2.8407414754231772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,2.6092000007629395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,float16,0,1.0623359680175781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,fp8,0,1.0723946889241536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,2.8437439600626626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,128,1,fp8,fp8,0,0.9828586578369141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,2.8545173009236655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,2.6212426821390786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,float16,0,0.6520586808522543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,fp8,0,0.6767679850260416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,1.5718506177266438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,128,1,fp8,fp8,0,0.6293066740036011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,1.5879947344462078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,1.466261386871338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,float16,0,0.5913706620534261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,fp8,0,0.5951253175735474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,1.494965394337972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,128,1,fp8,fp8,0,0.5493599971135458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,1.386090596516927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,1.5001386006673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,float16,0,0.5940106709798177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,fp8,0,0.5989866654078165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,1.5003199577331543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,128,1,fp8,fp8,0,0.5519040028254191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,1.5056053797403972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,1.3901492754618328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,float16,0,0.5981546640396118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,fp8,0,0.6022613445917765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,1.5062932968139648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,128,1,fp8,fp8,0,0.5562026500701904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,1.510719935099284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,1.3918293317159016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,float16,0,0.45076799392700195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,0.927728017171224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,fp8,0,0.45105600357055664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,128,1,fp8,fp8,0,0.4209333260854085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,0.9276373386383057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,0.8636213143666586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,float16,0,0.4490293264389038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,fp8,0,0.44916268189748126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,0.9203200340270996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,128,1,fp8,fp8,0,0.4207306702931722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,0.9195520083109537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,0.8583093484242758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,float16,0,0.4511306683222453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,fp8,0,0.449893315633138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,0.9207786719004313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,128,1,fp8,fp8,0,0.4226773182551066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,0.9206666946411133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,0.8579146862030029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,float16,0,0.45074133078257245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,fp8,0,0.44999468326568604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,0.9225172996520996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,128,1,fp8,fp8,0,0.4211466709772746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,0.9250720342000326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,0.8574666976928711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,float16,0,5.096090634663899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,fp8,0,5.133322715759277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,10.931077321370443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,128,1,fp8,fp8,0,4.646517435709636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,10.970389048258463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,float16,0,5.158725420633952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,10.063349405924479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,fp8,0,5.197519938151042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,11.022565205891928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,128,1,fp8,fp8,0,4.7278134028116865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,11.060340881347656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,10.148143768310547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,float16,0,5.197461446126302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,fp8,0,5.237866719563802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,11.070133209228516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,128,1,fp8,fp8,0,4.766928037007649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,11.108389536539713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,10.193594614664713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,float16,0,2.917242685953776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,fp8,0,2.973679860432943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,5.940127690633138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,128,1,fp8,fp8,0,2.7679945627848306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,5.99943478902181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,5.50216547648112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,float16,0,2.5597227414449057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,fp8,0,2.582138697306315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,5.487333297729492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,128,1,fp8,fp8,0,2.331109364827474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,17.198293050130207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,5.509509404500325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,5.055295944213867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,float16,0,2.5707947413126626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,fp8,0,2.5985867182413735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,128,1,fp8,fp8,0,2.3472159703572593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,5.505807876586914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,float16,0,2.585872014363607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,5.539845148722331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,5.068938573201497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,fp8,0,2.6130293210347495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,128,1,fp8,fp8,0,2.367664019266764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,5.541615804036458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,float16,0,1.495408058166504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,5.565135955810547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,5.089215914408366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,3.0201120376586914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,fp8,fp8,0,1.4260692596435547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,fp8,0,1.5319306055704753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,float16,0,1.325434684753418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,3.054938634236654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,2.8040587107340493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,2.8035945892333984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,fp8,0,1.3615733782450359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,2.5783467292785645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,2.814864158630371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,float16,0,1.3417439460754395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,fp8,0,1.3426559766133626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,2.8139146169026694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,128,1,fp8,fp8,0,1.2177333037058513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,2.826181411743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,2.591279983520508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,float16,0,1.3387519518534343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,fp8,0,1.3521332740783691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,2.820495923360189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,128,1,fp8,fp8,0,1.2303413550059001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,2.8407360712687173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,2.6035733222961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,float16,0,0.7918399969736735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,1.5634133021036785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,fp8,0,0.818453311920166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,128,1,fp8,fp8,0,0.758949359258016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,1.5836106936136882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,1.4608480135599773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,float16,0,0.7074027061462402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,fp8,0,0.7116266886393229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,1.4593706130981445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,128,1,fp8,fp8,0,0.6501439809799194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,1.4637707074483235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,1.3492266337076824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,float16,0,0.7095466454823812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,fp8,0,0.7160747051239014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,1.4628532727559407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,128,1,fp8,fp8,0,0.65611732006073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,1.4706613222757976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,1.3555679321289062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,float16,0,0.7136906782786051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,fp8,0,0.7213546435038248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,1.4711893399556477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,128,1,fp8,fp8,0,0.6600159804026285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,1.478117307027181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,1.3588852882385254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,float16,0,0.4414986769358317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,0.8405173619588217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,fp8,0,0.4524746735890706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,128,1,fp8,fp8,0,0.42681066195170086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,0.8506613572438558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,0.8053759733835856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,float16,0,0.39946667353312176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,fp8,0,0.4023146629333496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,0.7843466599782308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,128,1,fp8,fp8,0,0.3714933395385742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,0.7900319894154867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,0.7325440247853597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,float16,0,0.4018666744232178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,0.7913706302642822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,fp8,0,0.40511465072631836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,128,1,fp8,fp8,0,0.38179731369018555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,0.7934026718139648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,0.7340693473815918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,float16,0,0.41155731678009033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,0.7941813468933105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,fp8,0,0.40648531913757324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,128,1,fp8,fp8,0,0.37677868207295734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,0.8142613569895426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,0.7369706630706787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,float16,0,0.30744000275929767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,0.5171146790186564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,fp8,0,0.3084426720937093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,128,1,fp8,fp8,0,0.2899786631266276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,0.5174880027770996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,0.48587199052174884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,float16,0,0.30617600679397583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,0.5093813339869181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,fp8,0,0.3064639965693156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,128,1,fp8,fp8,0,0.2857973376909892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,0.5105653206507365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,0.47844799359639484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,float16,0,0.3051146666208903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,0.51201065381368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,fp8,0,0.30513066053390503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,128,1,fp8,fp8,0,0.2877226670583089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,0.5129013458887736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,0.4855360190073649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,float16,0,0.3065173427263896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,0.5161173343658447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,fp8,0,0.3068480094273885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,128,1,fp8,fp8,0,0.28779200712839764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,0.5113333463668823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,0.47787201404571533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,float16,0,3.7798026402791343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,6.875786463419597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,fp8,0,3.8105386098225913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,128,1,fp8,fp8,0,3.4422079722086587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,6.905066808064778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,6.323471705118815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,float16,0,3.807018597920736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,6.911317189534505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,fp8,0,3.8391946156819663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,128,1,fp8,fp8,0,3.481893221537272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,6.940298716227214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,6.365327835083008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,float16,0,3.828240076700846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,fp8,0,3.862127939860026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,6.944826761881511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,128,1,fp8,fp8,0,3.508570671081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,6.978986740112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,6.393237431844075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,float16,0,2.182725270589193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,fp8,0,2.2291733423868814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,3.7818078994750977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,128,1,fp8,fp8,0,2.072005271911621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,3.8267733256022134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,3.5285867055257163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,float16,0,1.9204853375752766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,fp8,0,1.9364213943481445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,3.4741172790527344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,128,1,fp8,fp8,0,1.7494667371114094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,3.4917386372884116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,3.2007786432902017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,float16,0,1.9293707211812336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,3.491135915120443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,fp8,0,1.9475520451863606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,128,1,fp8,fp8,0,1.7633280754089355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,3.5070667266845703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,3.2122186024983725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,float16,0,1.9404106140136719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,fp8,0,1.9595252672831218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,3.506704012552897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,128,1,fp8,fp8,0,1.7775626182556152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,3.52620792388916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,3.2272958755493164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,float16,0,1.1270026365915935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,fp8,0,1.1525013446807861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,1.9328907330830891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,128,1,fp8,fp8,0,1.0731253623962402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,1.9598080317179363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,1.8104480107625325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,float16,0,0.9958986441294352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,fp8,0,1.0050559838612874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,1.7834879557291667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,128,1,fp8,fp8,0,0.9125493367513021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,1.7925386428833008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,1.6458880106608074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,float16,0,1.0003360112508137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,fp8,0,1.0106666882832844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,1.7922453880310059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,128,1,fp8,fp8,0,0.9190186659495035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,1.8010187149047852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,1.6523359616597493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,float16,0,1.0066346327463787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,fp8,0,1.0165599981943767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,1.7990454037984211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,128,1,fp8,fp8,0,0.9255146980285645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,1.8084853490193684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,1.659023920694987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,float16,0,0.5974133412043253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,1.0104906558990479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,fp8,0,0.6136639912923177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,128,1,fp8,fp8,0,0.5736533403396606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,1.0250026384989421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,0.9491999944051107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,float16,0,0.5330719947814941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,0.9364746411641439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,fp8,fp8,0,0.4931519826253255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,fp8,0,0.5387786626815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,0.9407626787821451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,0.8675359884897867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,float16,0,0.5372373263041178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,0.9380746682484945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,fp8,0,0.5418186585108439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,128,1,fp8,fp8,0,0.4959893226623535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,0.9436906973520914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,0.8714026610056559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,float16,0,0.5383146603902181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,fp8,0,0.5441439946492513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,0.943610668182373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,128,1,fp8,fp8,0,0.500437339146932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,0.950160026550293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,0.8750186761220297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,float16,0,0.33692800998687744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,0.549946665763855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,fp8,0,0.3452693223953247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,128,1,fp8,fp8,0,0.3237706621487935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,0.56004798412323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,0.5209920008977255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,float16,0,0.3015893300374349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,fp8,0,0.3041920065879822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,0.5109813213348389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,128,1,fp8,fp8,0,0.28357332944869995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,0.5120853185653687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,0.4889119863510132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,float16,0,0.3027893304824829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,fp8,0,0.3061013420422872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,0.5129973491032919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,128,1,fp8,fp8,0,0.28519999980926514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,0.5150719881057739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,0.480954647064209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,float16,0,0.30588799715042114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,0.5151679913202921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,fp8,0,0.30833067496617633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,128,1,fp8,fp8,0,0.28730666637420654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,0.5202133258183798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,0.48401065667470294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,float16,0,0.23458667596181235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,0.3511413335800171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,fp8,0,0.23562665780385336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,128,1,fp8,fp8,0,0.2189226746559143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,0.3524479866027832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,0.32528533538182575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,float16,0,0.23042132457097372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,fp8,0,0.23218133052190146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,128,1,fp8,fp8,0,0.21996267636617026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.3471999963124593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,0.3265119989713033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,float16,0,0.23213867346445718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.3473546504974365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,fp8,0,0.23333332935969034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,128,1,fp8,fp8,0,0.21911466121673584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.3466613292694092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,0.3250346581141154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,float16,0,0.23314666748046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.34704001744588214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,fp8,0,0.2323039968808492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,128,1,fp8,fp8,0,0.21863466501235962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.3476639986038208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,0.3263466755549113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,float16,0,5.044543902079265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,7.44761594136556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,fp8,0,5.074048042297363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,128,1,fp8,fp8,0,4.57150936126709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,7.474602381388347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,6.826154708862305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,float16,0,5.154677391052246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,7.56654421488444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,fp8,0,5.144229253133138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,128,1,fp8,fp8,0,4.628554662068685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,7.553152084350586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,6.886351903279622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,float16,0,5.173791885375977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,7.58892822265625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,fp8,0,5.179525375366211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,128,1,fp8,fp8,0,4.669685363769531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,7.596282958984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,6.927408218383789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,float16,0,2.8721386591593423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,4.128191947937012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,fp8,0,2.9239892959594727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,128,1,fp8,fp8,0,2.7231998443603516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,4.17794672648112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,3.868570645650228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,float16,0,2.510874589284261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,3.7186028162638345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,fp8,0,2.5338560740152993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,128,1,fp8,fp8,0,2.2828426361083984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,3.740463892618815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,3.4156214396158853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,float16,0,2.52729066212972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,3.738767941792806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,fp8,0,2.550597349802653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,128,1,fp8,fp8,0,2.300133387247721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,3.761514663696289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,3.4356425603230796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,float16,0,2.5434133211771646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,3.7584425608317056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,fp8,0,2.566762606302897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,128,1,fp8,fp8,0,2.3187999725341797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,3.781680107116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,3.4543145497639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,float16,0,1.4597439765930176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,2.093290646870931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,fp8,0,1.4892959594726562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,128,1,fp8,fp8,0,1.3828479448954265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,2.123407999674479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,1.9616907437642415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,float16,0,1.2834880352020264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,1.8924427032470703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,fp8,0,1.2949013710021973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,128,1,fp8,fp8,0,1.1689813137054443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,1.9035733540852864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,1.7395893732706706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,float16,0,1.290719985961914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,1.901642640431722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,fp8,0,1.303125301996867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,128,1,fp8,fp8,0,1.1777119636535645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,1.9133920669555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,1.750266710917155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,float16,0,1.2996266682942708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,1.9115039507548015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,fp8,0,1.3110346794128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,128,1,fp8,fp8,0,1.1870240370432537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,1.9244747161865234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,1.7593599955240886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,float16,0,0.7547146479288737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,1.078661362330119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,fp8,0,0.7736426989237467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,128,1,fp8,fp8,0,0.7205333709716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,1.0957706769307454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,1.0141599973042805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,float16,0,0.6679946581522623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,0.9801119963328043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,fp8,0,0.6758293310801188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,128,1,fp8,fp8,0,0.6133866707483927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,0.9846239884694418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,0.904741366704305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,float16,0,0.6725653012593588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,0.9839146931966146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,fp8,0,0.6793066660563151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,128,1,fp8,fp8,0,0.6190186738967896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,1.0085279941558838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,0.906602700551351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,float16,0,0.6764372984568278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,0.9886879920959473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,fp8,0,0.6828587055206299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,128,1,fp8,fp8,0,0.6232746839523315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,0.9961012999216715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,0.9136586983998617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,float16,0,0.40331733226776123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,0.5705440044403076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,fp8,0,0.41460267702738446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,128,1,fp8,fp8,0,0.38869333267211914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,0.5823946793874105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,0.5401333173116049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,float16,0,0.3590453465779622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,0.5197333494822184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,fp8,0,0.3616960048675537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,128,1,fp8,fp8,0,0.33454398314158124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,0.5213493506113688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,0.48470401763916016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,float16,0,0.3614666859308879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,0.5235999822616577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,fp8,0,0.3654880126317342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,128,1,fp8,fp8,0,0.33624001344045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,0.5247840086619059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,0.48815464973449707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,float16,0,0.36347734928131104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,0.5251253445943197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,fp8,0,0.3683893283208211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,128,1,fp8,fp8,0,0.3387519915898641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,0.528437336285909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,0.48928534984588623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,float16,0,0.23034133513768515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,0.3189280033111572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,fp8,0,0.23646400372187296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,128,1,fp8,fp8,0,0.2241493264834086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,0.3248533407847087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,0.30397866169611615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,float16,0,0.20196266969045004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.2882240017255147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,fp8,0,0.2052639921506246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,128,1,fp8,fp8,0,0.19131733973821005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.2898826599121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,0.27265600363413495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,float16,0,0.2036479910214742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.28990399837493896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,fp8,0,0.20756266514460245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,8.347173055013021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,128,1,fp8,fp8,0,0.19564266999562582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.29173866907755536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,0.277130663394928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,float16,0,0.2058080037434896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.29154133796691895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,fp8,0,0.2097546656926473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,128,1,fp8,fp8,0,0.19854400555292764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.2943626642227173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,0.2781493266423543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,float16,0,0.16268799702326456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,fp8,0,0.16276266177495322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.2139093279838562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,128,1,fp8,fp8,0,0.15428800384203592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.21382933855056763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.1994453271230062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,float16,0,0.1604320009549459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.2099626660346985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,fp8,fp8,0,0.15200000007947287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.2101759910583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.19558932383855185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,float16,0,0.16037333011627197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.2092746694882711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,fp8,0,0.16259732842445374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,128,1,fp8,fp8,0,0.15264532963434854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.20982933044433594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.1973173419634501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,float16,0,0.1604586640993754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.2097439964612325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,fp8,0,0.1609493295351664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,128,1,fp8,fp8,0,0.15110400319099426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.20899200439453125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.19553599754969278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,float16,0,3.735370635986328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,4.907125473022461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,fp8,0,3.7642507553100586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,128,1,fp8,fp8,0,3.392165184020996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,4.93612798055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,4.5024159749348955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,float16,0,3.7753012975056968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,4.950463930765788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,fp8,0,3.798133214314779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,128,1,fp8,fp8,0,3.4325278600056968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,4.973189353942871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,4.537749290466309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,float16,0,3.7970078786214194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,4.97713057200114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,fp8,0,3.8209441502889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,128,1,fp8,fp8,0,3.454613367716471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,5.000432014465332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,4.565936088562012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,float16,0,2.162597338358561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,2.769983927408854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,fp8,0,2.1884640057881675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,128,1,fp8,fp8,0,2.032357374827067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,2.812533378601074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,2.599402745564779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,float16,0,1.8790079752604167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,2.466383934020996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,fp8,0,1.8935306866963704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,128,1,fp8,fp8,0,1.7076746622721355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,2.48251740137736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,2.266730626424154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,float16,0,1.8893013000488281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,2.480133374532064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,fp8,0,1.9065546989440918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,128,1,fp8,fp8,0,1.7238346735636394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,2.498576005299886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,2.2813119888305664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,float16,0,1.9006080627441406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,2.493626594543457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,fp8,0,1.91976531346639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,128,1,fp8,fp8,0,1.7370400428771973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,2.5114879608154297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,2.2939252853393555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,float16,0,1.0946666399637859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,1.4103412628173828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,fp8,0,1.1256799697875977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,128,1,fp8,fp8,0,1.0410772959391277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,1.4320906003316243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,1.3250719706217449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,float16,0,0.96234663327535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,1.2623519897460938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,fp8,0,0.9713013172149658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,128,1,fp8,fp8,0,0.8783146540323893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,1.2697386741638184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,1.160864035288493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,float16,0,0.9684159755706787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,1.2675786813100178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,fp8,0,0.9780960083007812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,128,1,fp8,fp8,0,0.8855306307474772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,1.2774933179219563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,1.168394645055135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,float16,0,0.9738506476084391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,1.2738453547159831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,fp8,0,0.984106699625651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,128,1,fp8,fp8,0,0.8920053641001383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,1.2853013674418132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,1.1747946739196777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,float16,0,0.5691200097401937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,0.7331733703613281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,fp8,0,0.5850826501846313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,128,1,fp8,fp8,0,0.5442293485005697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,0.746239980061849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,0.6919253667195638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,float16,0,0.5042879978815714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,0.6567360162734985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,fp8,0,0.5071733395258585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,128,1,fp8,fp8,0,0.4637920061747233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,0.6621706485748291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,0.6093600193659464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,float16,0,0.5072960058848063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,0.659605344136556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,fp8,0,0.5104639927546183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,128,1,fp8,fp8,0,0.46716264883677167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,0.666586677233378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,0.6129066546758016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,float16,0,0.510752002398173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,0.6644746859868368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,fp8,0,0.5148053169250488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,128,1,fp8,fp8,0,0.47020800908406574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,0.6691253185272217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,0.6152906815210978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,float16,0,0.30795733133951825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,0.39234666029612225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,fp8,0,0.31485867500305176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,128,1,fp8,fp8,0,0.29578665892283124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,0.40110401312510174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,0.37305064996083576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,float16,0,0.26950399080912274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.35049601395924884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,fp8,0,0.27242666482925415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,128,1,fp8,fp8,0,0.2542240023612976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.3531200091044108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,0.3312159975369771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,float16,0,0.27183467149734497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,0.3521226644515991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,fp8,0,0.2755733331044515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,128,1,fp8,fp8,0,0.256933331489563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.35575465361277264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,0.33316800991694134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,float16,0,0.2741333246231079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,0.35524264971415204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,fp8,0,0.2776693304379781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,128,1,fp8,fp8,0,0.25909332434336346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,0.35862934589385986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,0.33560534318288165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,float16,0,0.17684266964594522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.22320000330607095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,fp8,0,0.18085867166519165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,128,1,fp8,fp8,0,0.170799990495046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.22860799233118692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.2139306664466858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,float16,0,0.15411200126012167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.19885865847269693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,fp8,0,0.15432533621788025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,128,1,fp8,fp8,0,0.14416533708572388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.20027732849121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.18498132626215616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,float16,0,0.1530506710211436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.19773866732915243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,fp8,0,0.15498666961987814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,128,1,fp8,fp8,0,0.1441386640071869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.20090667406717935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.18738667170206705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,float16,0,0.15574933091799417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.19965332746505737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,fp8,0,0.15653333067893982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,128,1,fp8,fp8,0,0.14710399508476257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.2015413244565328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.18940800428390503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,float16,0,0.12405866384506226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.150629331668218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,fp8,0,0.12353066603342693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,128,1,fp8,fp8,0,0.11761066317558289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.15034133195877075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.1444000005722046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,float16,0,0.12351466218630473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.1500693360964457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,fp8,0,0.12175466616948445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,128,1,fp8,fp8,0,0.11558933059374492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.1407786707083384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,float16,0,0.12310933073361714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.14842133720715842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,fp8,0,0.12388267119725545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,128,1,fp8,fp8,0,0.11585066715876262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.1483626663684845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.14045332868893942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,float16,0,0.12277332941691081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.15041066209475198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,fp8,0,0.121888001759847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,128,1,fp8,fp8,0,0.11564800143241882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.14841066797574362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.1402453382809957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,float16,0,4.526570638020833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,5.252400080362956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,fp8,0,4.507583936055501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,128,1,fp8,fp8,0,4.2672373453776045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,5.24237855275472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,4.993818600972493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,float16,0,4.634117444356282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,5.364661534627278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,fp8,0,4.610090573628743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,128,1,fp8,fp8,0,4.57806936899821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,5.363706588745117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,5.295626640319824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,float16,0,4.679189364115397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,5.435946782430013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,fp8,0,4.666346549987793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,128,1,fp8,fp8,0,4.593935966491699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,5.431221644083659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,5.297487894694011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,float16,0,2.541696071624756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,2.9607413609822593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,fp8,0,2.4884692827860513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,128,1,fp8,fp8,0,2.4573973019917807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,2.9000746409098306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,2.8252585728963218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,float16,0,2.260144074757894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,2.632912000020345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,fp8,0,2.2574987411499023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,128,1,fp8,fp8,0,2.1420106887817383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,2.627280076344808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,2.4909119606018066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,float16,0,2.272432009379069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,2.6493493715922036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,fp8,0,2.273850599924723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,128,1,fp8,fp8,0,2.260752042134603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,2.6455413500467935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,2.6292746861775718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,float16,0,2.2747839291890464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,2.6704959869384766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,fp8,0,2.270400047302246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,128,1,fp8,fp8,0,2.263589382171631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,2.648746649424235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,2.6349546114603677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,float16,0,1.2301013469696045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,1.4344639778137207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,fp8,0,1.2101706663767497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,128,1,fp8,fp8,0,1.2215946515401204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,1.4110399881998699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,1.4108586311340332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,float16,0,1.1417280038197835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,1.3315573533376057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,fp8,0,1.1366613705952961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,128,1,fp8,fp8,0,1.0481226444244385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,1.3275787035624187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,1.2315893173217773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,float16,0,1.1460586388905842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,1.3367199897766113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,fp8,0,1.1456960042317708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,128,1,fp8,fp8,0,1.107472022374471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,1.338037331899007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,1.280234654744466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,float16,0,1.1472427050272624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,1.3361867268880208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,fp8,0,1.1431519985198975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,128,1,fp8,fp8,0,1.0837013721466064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,1.3334773381551106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,1.2731359799702961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,float16,0,0.6238613526026408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,0.7306079864501953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,fp8,0,0.6121386686960856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,128,1,fp8,fp8,0,0.6226880153020223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,0.7183679739634196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,0.7132319609324137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,float16,0,0.5788799921671549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,0.6751200358072916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,fp8,0,0.5800319910049438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,128,1,fp8,fp8,0,0.5390986601511637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,0.6776639620463053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,0.6249226729075114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,float16,0,0.5825493335723877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,0.6803786754608154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,fp8,0,0.5830080111821493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,128,1,fp8,fp8,0,0.5514293511708578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,0.6813333034515381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,0.6378399928410848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,float16,0,0.5842719872792562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,0.6869973341623942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,fp8,0,0.5851306517918905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,128,1,fp8,fp8,0,0.5475733280181885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,0.6813759803771973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,0.6403359969456991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,float16,0,0.32561065753300983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,0.3813973267873128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,fp8,0,0.32473599910736084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,128,1,fp8,fp8,0,0.3238933285077413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,0.3764053185780843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,0.3698933521906535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,float16,0,0.30213866631189984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,0.3528053363164266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,fp8,0,0.301258663336436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,128,1,fp8,fp8,0,0.27659199635187787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,0.3528906504313151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,0.3272639910380046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,float16,0,0.304202675819397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,0.352783997853597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,fp8,0,0.30220266183217365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,128,1,fp8,fp8,0,0.2845333417256673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,0.3531999985376994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,0.3319466710090637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,float16,0,0.30242133140563965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,0.35288000106811523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,fp8,0,0.3033813238143921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,128,1,fp8,fp8,0,0.283733328183492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,0.35445332527160645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,0.3333546717961629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,float16,0,0.17482666174570718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.2043786644935608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,fp8,0,0.1715679963429769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,128,1,fp8,fp8,0,0.17069333791732788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.20032533009847006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,0.1974560022354126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,float16,0,0.15825066963831583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.18450667460759482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,fp8,0,0.15787200133005777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,128,1,fp8,fp8,0,0.14829333623250326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.18636266390482584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.17592533429463705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,float16,0,0.16037866473197937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.1871946652730306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,fp8,0,0.15879467129707336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,128,1,fp8,fp8,0,0.15070399641990662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.1864373286565145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.1776906649271647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,float16,0,0.15969066818555197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.18658133347829184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,fp8,0,0.15982932845751444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,128,1,fp8,fp8,0,0.15241066614786783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.18700265884399414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.17917333046595255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,float16,0,0.09902399778366089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.11570133765538533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,fp8,0,0.09855467081069946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,128,1,fp8,fp8,0,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.11505066355069478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.11515733599662781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,float16,0,0.08841066559155782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,fp8,0,0.08914666374524434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,128,1,fp8,fp8,0,0.08159466584523518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.10685333609580994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.09789866209030151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,float16,0,0.09010133147239685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.10668800274531047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,fp8,0,0.08870933453241985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,128,1,fp8,fp8,0,0.08310399949550629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.1065120001633962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.09898666540781657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,float16,0,0.08847467104593913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.10557333628336589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,fp8,0,0.08866133292516072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,128,1,fp8,fp8,0,0.08298666775226593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.10654933253924052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.09919466574986775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,float16,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.06694933275381725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,fp8,0,0.0558240016301473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,128,1,fp8,fp8,0,0.05517866710821787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.06453866759936015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,float16,0,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.06484266618887584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,fp8,0,0.05388266841570536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,128,1,fp8,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.06481599807739258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.06131199995676676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,float16,0,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,fp8,0,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,128,1,fp8,fp8,0,0.050255998969078064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.06400533517201741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,float16,0,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.06474666794141133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,fp8,0,0.05435200035572052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,128,1,fp8,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.060640002290407814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,float16,0,4.388448079427083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,4.410794576009114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,fp8,0,4.383850733439128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,128,1,fp8,fp8,0,4.135114669799805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,4.411535898844401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,4.183333396911621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,float16,0,4.429162661234538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,4.482303937276204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,64,128,1,fp8,fp8,0,1.210927963256836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,fp8,0,4.422645250956218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,128,1,fp8,fp8,0,4.458640098571777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,4.44595209757487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,4.511818567911784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,float16,0,4.521120071411133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,4.571776072184245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,fp8,0,4.490928014119466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,128,1,fp8,fp8,0,4.456714630126953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,4.559712092081706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,float16,0,2.47436793645223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,4.509530703226726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,2.467696030934652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,fp8,0,2.4341813723246255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,128,1,fp8,fp8,0,2.379466692606608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,2.4690720240275064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,2.41867733001709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,float16,0,2.198592027028402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,2.2052319844563804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,fp8,0,2.1994880040486655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,128,1,fp8,fp8,0,2.069701353708903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,2.2061972618103027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,2.090538660685221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,float16,0,2.2080000241597495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,2.2222773234049478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,fp8,0,2.2097546259562173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,128,1,fp8,fp8,0,2.2008585929870605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,2.222426732381185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,2.2242453893025718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,float16,0,2.2138400077819824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,2.2350452740987143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,fp8,0,2.2023520469665527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,128,1,fp8,fp8,0,2.207461357116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,2.215541362762451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,float16,0,1.2014453411102295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,2.2322986920674643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,1.2118666966756184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,fp8,0,1.1909333070119221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,128,1,fp8,fp8,0,1.185477336247762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,1.208026647567749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,1.1987520058949788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,float16,0,1.10807998975118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,1.113759994506836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,fp8,0,1.105247974395752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,128,1,fp8,fp8,0,1.0214186509450276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,1.1131786505381267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,1.036245346069336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,float16,0,1.1111360390981038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,1.121232032775879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,fp8,0,1.1098346710205078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,128,1,fp8,fp8,0,1.091765324274699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,1.119109312693278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,1.09716796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,float16,0,1.1120533148447673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,1.1233386993408203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,fp8,0,1.1076800028483074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,128,1,fp8,fp8,0,1.0709599653879802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,1.1156906286875408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,1.0761706829071045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,float16,0,0.6073386669158936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,0.6172533432642618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,fp8,0,0.5965280135472616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,128,1,fp8,fp8,0,0.5985920031865438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,0.6068480014801025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,0.6102399826049805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,float16,0,0.5637706518173218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,0.5679999987284342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,fp8,0,0.5659146706263224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,128,1,fp8,fp8,0,0.5213386615117391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,0.5681706666946411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,0.5468693176905314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,float16,0,0.5662773450215658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,0.5706506570180258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,fp8,0,0.5670400063196818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,128,1,fp8,fp8,0,0.5305279890696207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,0.5688960154851278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,0.5365013281504313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,float16,0,0.5669013261795044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,0.5719306468963623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,fp8,0,0.5684053500493368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,128,1,fp8,fp8,0,0.5332266489664713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,0.5708106756210327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,0.5383733510971069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,float16,0,0.3161440094312032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,0.32386666536331177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,fp8,0,0.31058667103449505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,128,1,fp8,fp8,0,0.3122719923655192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,0.3168586691220601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,0.31596799691518146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,float16,0,0.29386667410532635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.2956479986508687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,fp8,0,0.29337600866953534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,128,1,fp8,fp8,0,0.27237866322199505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.296725332736969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.2751520077387492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,float16,0,0.2935360074043274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.29603199164072674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,fp8,0,0.29419734080632526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,128,1,fp8,fp8,0,0.2749493320782979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.29601067304611206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.27830400069554645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,float16,0,0.2939466635386149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.297050674756368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,fp8,0,0.2953973412513733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,128,1,fp8,fp8,0,0.2777493397394816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.2971893350283305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.28011200825373334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,float16,0,0.16983999808629355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.17384533087412515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,fp8,0,0.16710400581359863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,128,1,fp8,fp8,0,0.16743467251459757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.1693333387374878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.16922666629155478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,float16,0,0.15565866231918335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.1564479966958364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,fp8,0,0.1548373301823934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,128,1,fp8,fp8,0,0.1439786652723948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.15610667069753012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.14621866742769876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,float16,0,0.15585066874821982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.15733333428700766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,fp8,0,0.15434666474660239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,128,1,fp8,fp8,0,0.1458506683508555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.1566933294137319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.14969600240389505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,float16,0,0.1564640005429586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.15870400269826254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,fp8,0,0.15495466192563376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,128,1,fp8,fp8,0,0.14689600467681885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.15783466895421347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.15057599544525146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,float16,0,0.09559999903043111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.09745066364606221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,fp8,0,0.09489066402117412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,128,1,fp8,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.09576533238093059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.09896533687909444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,float16,0,0.08762133121490479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.08826133608818054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,fp8,0,0.08803733189900716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.08814932902654012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,128,1,fp8,fp8,0,0.08161599934101105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,float16,0,0.08807466427485149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.08713066577911377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,fp8,0,0.08844266335169475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,128,1,fp8,fp8,0,0.08054399987061818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.08758933345476787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.0824480007092158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,float16,0,0.08768000205357869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.0888266662756602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,fp8,fp8,0,0.08260799944400787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,fp8,0,0.08943466345469157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.08516800403594971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,float16,0,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.057317331433296204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,fp8,0,0.05606399973233541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,128,1,fp8,fp8,0,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.05526400109132131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,float16,0,0.05359466870625814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,fp8,0,0.05340800185998281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,128,1,fp8,fp8,0,0.04966933528582255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.053114667534828186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,float16,0,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.054378668467203774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,fp8,0,0.052501335740089417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,128,1,fp8,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.05306666592756907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,float16,0,0.05342400074005127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,fp8,0,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,128,1,fp8,fp8,0,0.04990399877230326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.053802669048309326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.05129600067933401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,fp8,0,0.03639466563860575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,128,1,fp8,fp8,0,0.036720000207424164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.03580799947182337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,fp8,0,0.036229332288106285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,128,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,128,1,fp8,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,fp8,0,0.03644266724586487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,128,1,fp8,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,float16,0,2.0487893422444663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,2.002629280090332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,fp8,fp8,0,1.9038507143656414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,fp8,0,2.0423572858174643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,1.853226661682129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,1.9988212585449219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,float16,0,2.043935934702555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,1.9979999860127766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,fp8,0,2.038501262664795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,128,1,fp8,fp8,0,2.0401973724365234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,1.995888074239095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,2.004410743713379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,float16,0,2.0681279500325522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,fp8,0,2.046565373738607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,2.040309270222982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,128,1,fp8,fp8,0,2.0499839782714844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,2.008101304372152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,float16,0,1.1176746686299641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,2.0030080477396646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,1.09989333152771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,fp8,0,1.1108853022257488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,128,1,fp8,fp8,0,1.1159733136494954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,1.0905439853668213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,1.0888906319936116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,float16,0,1.0317280292510986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,1.009989341100057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,fp8,0,1.0295573075612385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,128,1,fp8,fp8,0,0.9442453384399414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,1.0071360270182292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,0.9232107003529867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,float16,0,1.0295093059539795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,1.0096426804860432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,fp8,0,1.0301813284556072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,128,1,fp8,fp8,0,0.9997813701629639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,1.0088213284810383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,0.980400005976359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,float16,0,1.0347306728363037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,1.0134560267130535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,fp8,0,1.030682643254598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,128,1,fp8,fp8,0,0.9852746327718099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,1.0087040265401204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,0.9692959785461426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,float16,0,0.5657013257344564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,0.557429313659668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,fp8,0,0.5573439995447794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,128,1,fp8,fp8,0,0.565226674079895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,0.5464479923248291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,0.5538453261057535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,float16,0,0.5248426596323649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,0.5135733286539713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,fp8,0,0.5225119988123575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,128,1,fp8,fp8,0,0.4829440116882324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,0.512666662534078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,0.4697599808375041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,float16,0,0.524453322092692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,0.5135413408279419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,fp8,0,0.5251040061314901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,128,1,fp8,fp8,0,0.49582934379577637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,0.5139413277308146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,0.4836213191350301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,float16,0,0.5274346669514974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,0.5157440106074015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,fp8,0,0.5253333250681559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,128,1,fp8,fp8,0,0.49647998809814453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,0.5152586698532104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,0.48527467250823975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,float16,0,0.2964906692504883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,0.29154666264851886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,fp8,0,0.2904426654179891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,128,1,fp8,fp8,0,0.2929760018984477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,0.2848479946454366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,0.2888159950574239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,float16,0,0.27425066630045575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.269322673479716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,fp8,0,0.27315733830134076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,128,1,fp8,fp8,0,0.25036799907684326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.26664533217748004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.24611733357111612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,float16,0,0.27290666103363037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.2675360043843587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,fp8,0,0.2727786699930827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,128,1,fp8,fp8,0,0.2573973337809245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.268069326877594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.2500213384628296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,float16,0,0.27487999200820923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.2697226603825887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,fp8,0,0.27347733577092487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,128,1,fp8,fp8,0,0.25598933299382526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.2681279977162679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,float16,0,0.15947733322779337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.25148799022038776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.15557333827018738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,fp8,0,0.15665599703788757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,128,1,fp8,fp8,0,0.1590079963207245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.15453867117563883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.15708800156911215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,float16,0,0.1455946664015452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.14308800299962363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,fp8,0,0.14627733826637268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,128,1,fp8,fp8,0,0.1360586682955424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.1425920029481252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.13375999530156454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,float16,0,0.14683199922243753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.14230400323867798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,fp8,0,0.14604266484578451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,128,1,fp8,fp8,0,0.13642133275667825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.1437173287073771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.13432533542315164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,float16,0,0.14640000462532043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.14261333147684732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,fp8,0,0.14680533607800803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,128,1,fp8,fp8,0,0.13899200161298117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.14331199725468954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.13643200198809305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,float16,0,0.09175466497739156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.09047999978065491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,fp8,0,0.09110933542251587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,128,1,fp8,fp8,0,0.0934826632340749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.08988266189893086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,float16,0,0.08453333377838135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.08298133313655853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,fp8,0,0.08316266536712646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,128,1,fp8,fp8,0,0.07696533203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.08291733264923096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.07629866898059845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,float16,0,0.08359466989835103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.08101333181063335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,fp8,0,0.08332266906897227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,128,1,fp8,fp8,0,0.07794133325417836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.0831413318713506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.07662400106589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,float16,0,0.08499733606974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.082805335521698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,fp8,0,0.0832426647345225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,128,1,fp8,fp8,0,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.08284266789754231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.07546666761239369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,float16,0,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,fp8,fp8,0,0.051728000243504844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,fp8,0,0.05242133140563965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.05147733290990194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,float16,0,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.04832000037034353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,fp8,0,0.04987200101216634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,128,1,fp8,fp8,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.04952533543109894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.04611733555793762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.04924799998601278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,fp8,0,0.050106664498647056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,128,1,fp8,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.04891733328501383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,float16,0,0.049029335379600525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,fp8,0,0.0491839994986852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,128,1,fp8,fp8,0,0.04740266501903534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.04786666731039683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,float16,0,0.033941333492596946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,128,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,float16,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,128,1,fp8,fp8,0,0.032298666735490165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,float16,0,0.03421333432197571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,fp8,0,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,128,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.032816000282764435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,128,1,fp8,fp8,0,0.03212266663710276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,fp8,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,128,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,float16,0,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,128,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,128,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.34515734513600665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,float16,0,1.0761280059814453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,1.079360008239746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,fp8,0,1.0735519727071126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,128,1,fp8,fp8,0,1.0199519793192546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,1.0753813584645588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,1.0160799821217854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,float16,0,1.0796159903208415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,1.0727360248565674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,fp8,fp8,0,1.0974133014678955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,fp8,0,1.0747360388437908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,1.0717919667561848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,1.1001386642456055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,float16,0,1.0813066959381104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,1.0823307037353516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,fp8,0,1.0820000171661377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,128,1,fp8,fp8,0,1.1045386791229248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,1.0806132952372234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,1.1037279764811199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,float16,0,0.5935519933700562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,0.5948533217112223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,fp8,0,0.581823984781901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,128,1,fp8,fp8,0,0.5985920031865438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,0.5802826484044393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,0.5971680084864298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,float16,0,0.5439306497573853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,0.5477120081583658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,fp8,0,0.5445226828257242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,128,1,fp8,fp8,0,0.5161226590474447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,0.542954683303833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,0.5134559869766235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,float16,0,0.5450773239135742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,0.5482666492462158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,fp8,0,0.5426346858342489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,128,1,fp8,fp8,0,0.5302026669184366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,0.54366401831309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,0.5309973160425822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,float16,0,0.5498079856236776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,0.5510346492131551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,fp8,0,0.5460746685663859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,128,1,fp8,fp8,0,0.5313546657562256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,0.5463786522547404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,0.5320959885915121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,float16,0,0.3062826593716939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,0.30851199229558307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,fp8,0,0.3004586696624756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,128,1,fp8,fp8,0,0.30871466795603436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,0.30135466655095416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,0.30806400378545123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,float16,0,0.28285332520802814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.2825760046641032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,fp8,0,0.2810399929682414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,128,1,fp8,fp8,0,0.26608532667160034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.28174932797749835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.2659253279368083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,float16,0,0.28096532821655273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.2818186680475871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,fp8,0,0.28005866209665936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,128,1,fp8,fp8,0,0.2727359930674235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.28031466404596966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.27271467447280884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,float16,0,0.28336000442504883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.28360533714294434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,fp8,0,0.281333327293396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,128,1,fp8,fp8,0,0.27427200476328534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.28173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.27395200729370117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,float16,0,0.16124799847602844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.16235199570655823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,fp8,0,0.15866133570671082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,128,1,fp8,fp8,0,0.1648960014184316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.15969066818555197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.16529599825541177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,float16,0,0.14869866768519083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.14869866768519083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,fp8,0,0.1492586632569631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,128,1,fp8,fp8,0,0.14017066359519958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.14938132961591086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.14154666662216187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,float16,0,0.14797332882881165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.14882666865984598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,fp8,0,0.14825066924095154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,128,1,fp8,fp8,0,0.14446933070818582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.1474560002485911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.14432533582051596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,float16,0,0.14871999621391296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.15001066525777182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,fp8,0,0.14998400211334229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,128,1,fp8,fp8,0,0.14470400412877402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.15034133195877075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.1456160048643748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,float16,0,0.09128000338872273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.09268800417582194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,fp8,0,0.08964799841245015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,128,1,fp8,fp8,0,0.09655466675758362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.09552533427874248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,float16,0,0.08401599526405334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.0836906631787618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,fp8,0,0.08365866541862488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,128,1,fp8,fp8,0,0.07948266466458638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.08299200236797333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.07858133316040039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,float16,0,0.083146666487058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.08270933230717976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,fp8,0,0.08467732866605122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,128,1,fp8,fp8,0,0.08016000191370647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.0844586690266927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.0804746647675832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,float16,0,0.08308800061543782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.08331733445326488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,fp8,0,0.08483200271924336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,128,1,fp8,fp8,0,0.08014933268229167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.08308266599973042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.07931200166543324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,float16,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.05067199965318044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,fp8,0,0.05208000044027964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,128,1,fp8,fp8,0,0.05247466762860616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.05186133086681366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,float16,0,0.0499839981396993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.049584001302719116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,fp8,0,0.049882665276527405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,128,1,fp8,fp8,0,0.04738666613896688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.05057600140571594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.04833599925041199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.04937600096066793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,float16,0,0.050213331977526345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,128,1,fp8,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.04911466439565023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,float16,0,0.05012799799442291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,fp8,0,0.050293331344922386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.050245334704717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,128,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.05045333504676819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,float16,0,0.032842665910720825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,128,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,float16,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,128,1,fp8,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,128,1,fp8,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,128,1,fp8,fp8,0,0.031925333042939506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,float16,0,0.02363733450571696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.024698667228221893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,128,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,128,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.024677333732446034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,128,1,fp8,fp8,0,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,float16,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,128,1,fp8,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,float16,0,0.7617440223693848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,0.7636799812316895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,fp8,0,0.7614453633626302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,128,1,fp8,fp8,0,0.7118079662322998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,0.7619093259175619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,0.7126826445261637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,float16,0,0.7638346354166666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,0.7616639931996664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,fp8,0,0.7597493330637614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,128,1,fp8,fp8,0,0.7232000033060709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,0.7603519757588705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,0.722879966100057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,float16,0,0.7645280361175537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,0.763983964920044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,fp8,0,0.7613653341929117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,128,1,fp8,fp8,0,0.7239253520965576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,0.7625546455383301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,0.7270932992299398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,float16,0,0.4108426570892334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,0.4118613402048747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,fp8,0,0.4069013198216756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,128,1,fp8,fp8,0,0.40221865971883136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,0.4073066711425781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,0.4039626518885295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,float16,0,0.38869865735371906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.3893333276112874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,fp8,0,0.38955732186635333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,128,1,fp8,fp8,0,0.3628693421681722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.38810133934020996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.3625653187433879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,float16,0,0.3886186679204305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.38922667503356934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,fp8,0,0.3875253200531006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,128,1,fp8,fp8,0,0.36899733543395996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.38805333773295086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.36818134784698486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,float16,0,0.38923199971516925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.389296015103658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,fp8,0,0.38810133934020996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,128,1,fp8,fp8,0,0.37090134620666504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.38728535175323486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.37045331796010333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,float16,0,0.21460266908009848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.21386667092641196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,fp8,0,0.21274133523305258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,128,1,fp8,fp8,0,0.2120586633682251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.21179733673731485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.21056532859802246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,float16,0,0.2024959921836853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.20241065820058188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,fp8,0,0.20268267393112183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,128,1,fp8,fp8,0,0.18849066893259683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.2021440068880717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.18812266985575357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,float16,0,0.20297066370646158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.2019253373146057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,fp8,0,0.2023626764615377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,128,1,fp8,fp8,0,0.1916960080464681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.20148267348607382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.19130667050679526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,float16,0,0.201855997244517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.20191466808319092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,fp8,0,0.20321067174275717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,128,1,fp8,fp8,0,0.19241599241892496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.20206934213638306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.19197867314020792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,float16,0,0.11760532855987549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.11754666765530904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,fp8,0,0.11633066336313884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,128,1,fp8,fp8,0,0.11830400427182515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.11534399787584941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.11661866307258606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,float16,0,0.10923199852307637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.10917333761850993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,fp8,0,0.1092693308989207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,128,1,fp8,fp8,0,0.1033066709836324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.10971732934315999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.1020906666914622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,float16,0,0.10946133732795715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.10871466994285583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,fp8,0,0.10942932963371277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,128,1,fp8,fp8,0,0.10282666484514873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.10817066828409831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.10150933265686035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,float16,0,0.10929600397745769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,fp8,0,0.10935466488202412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,128,1,fp8,fp8,0,0.10296533505121867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.10505066315333049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,float16,0,0.06482133269309998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.06505600114663442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,fp8,0,0.06454400221506755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,128,1,fp8,fp8,0,0.06484800080458324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.065610667069753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,float16,0,0.06285866598288219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.06291733185450236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,fp8,0,0.06313600142796834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,128,1,fp8,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,float16,0,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.06243200103441874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,fp8,0,0.0624533345301946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,128,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.06268799801667531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.059989333152770996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,float16,0,0.06358399987220764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.06381866832574208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,fp8,0,0.06270933151245117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,128,1,fp8,fp8,0,0.060080001751581825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.06284800171852112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.060032000144322716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,float16,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,128,1,fp8,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.0402453343073527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,float16,0,0.03844266633192698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,float16,0,0.03920533259709676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,128,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.038618666430314384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,float16,0,0.038986665507157646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.03878399978081385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,128,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,float16,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,128,1,fp8,fp8,0,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,float16,0,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,128,1,fp8,fp8,0,0.026378666361172993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,fp8,0,0.025968000292778015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,128,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.02697066714366277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,128,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,128,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,float16,0,0.605130672454834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.6075573364893595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,fp8,0,0.6091466744740804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,128,1,fp8,fp8,0,0.557642658551534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.6046026547749838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,0.5562880039215088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,float16,0,0.6042346556981405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.603434681892395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,fp8,0,0.60426131884257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,128,1,fp8,fp8,0,0.5627040068308512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.6037653287251791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,0.562181313832601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,float16,0,0.6054773330688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.604362686475118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,fp8,0,0.6037066777547201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,128,1,fp8,fp8,0,0.5647840102513632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.6033600171407064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,0.5629173517227173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,float16,0,0.32105066378911334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.32107200225194293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,fp8,0,0.320250670115153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,128,1,fp8,fp8,0,0.30988266070683795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.319760004679362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.3089333375295003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,float16,0,0.30802667140960693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.3100000023841858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,fp8,0,0.3085493246714274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,128,1,fp8,fp8,0,0.285589337348938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.309279998143514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.2861706614494324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,float16,0,0.3091306686401367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.30921600262324017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,fp8,0,0.30993600686391193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,128,1,fp8,fp8,0,0.28761066993077594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.31037867069244385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.2874293327331543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,float16,0,0.309173325697581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.3085493246714274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,fp8,0,0.31038933992385864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,128,1,fp8,fp8,0,0.2892480095227559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.30825599034627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.28852800528208417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,float16,0,0.16914665699005127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.16942399740219116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,fp8,0,0.1696959932645162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,128,1,fp8,fp8,0,0.16503999630610147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.1681333382924398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.1649066706498464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,float16,0,0.16289066274960837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.16286933422088623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,fp8,0,0.16261866688728333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,128,1,fp8,fp8,0,0.15030399958292642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.16204800208409628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.14979199568430582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,float16,0,0.16211199760437012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.16194666425387064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,fp8,0,0.1630293329556783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,128,1,fp8,fp8,0,0.15067733327547708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.16284799575805664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.15064533551534018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,float16,0,0.16198399662971497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.16189866264661154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,fp8,0,0.1629706621170044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,128,1,fp8,fp8,0,0.15105600158373514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.16234667102495828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.15053866306940714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.08905067046483357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,fp8,0,0.09058133761088054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,128,1,fp8,fp8,0,0.08877866466840108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.0902880032857259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.0885813335577647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,float16,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.08776533603668213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,fp8,0,0.08769599596659343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,128,1,fp8,fp8,0,0.08281599978605907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.08301333089669545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,float16,0,0.08716266353925069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.08931733171145122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,fp8,0,0.08947733044624329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,128,1,fp8,fp8,0,0.08548800150553386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.08915733297665913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.08501332998275757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,float16,0,0.08913600444793701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.08823999762535095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,fp8,0,0.08804800113042195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,128,1,fp8,fp8,0,0.08323733508586884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.08913066983222961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.08460799853006999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,float16,0,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.05357866485913595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,fp8,0,0.052186667919158936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,128,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,float16,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,128,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,float16,0,0.05179200073083242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.052239999175071716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,fp8,0,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,128,1,fp8,fp8,0,0.04980266590913137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.05151999990145365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.04965866605440775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,float16,0,0.05208000044027964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.05212800204753876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,fp8,0,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,128,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.05184000233809153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.050154666105906166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,float16,0,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,128,1,fp8,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,fp8,0,0.033861334125200905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,128,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,float16,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.0317493329445521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,float16,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,128,1,fp8,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.033146666983763375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,128,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,float16,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.022831998765468597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,128,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,128,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,float16,0,0.020501332978407543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,128,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,128,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,float16,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,float16,0,0.5215466817220052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.5220746596654257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,fp8,0,0.5237386624018351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,128,1,fp8,fp8,0,0.4814079999923706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.5236693223317465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.5052479902903239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,float16,0,0.5215413173039755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.5224586725234985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,fp8,0,0.5200693209966024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,128,1,fp8,fp8,0,0.482805331548055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.5214399894078573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.48374934991200763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,float16,0,0.5201119979222616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.5224693218866984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,fp8,0,0.5211840073267618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,128,1,fp8,fp8,0,0.48399468262990314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.5231466690699259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.4838346640268962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,float16,0,0.2752266724904378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.27530133724212646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,fp8,0,0.27535466353098553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,128,1,fp8,fp8,0,0.26265599330266315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.27425066630045575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.26176534096399945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,float16,0,0.26815466086069745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.267247994740804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,fp8,0,0.2691413362820943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,128,1,fp8,fp8,0,0.24754667282104492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.26816000541051227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.24660267432530722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,float16,0,0.2691839933395386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.2680480082829793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,fp8,0,0.26922667026519775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,128,1,fp8,fp8,0,0.24836266040802002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.2683093349138896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.24701333045959473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,float16,0,0.2688213388125102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.2676746646563212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,fp8,0,0.2690453330675761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,128,1,fp8,fp8,0,0.24870399634043375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.26979732513427734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.24732800324757895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,float16,0,0.1421386698881785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.1422719955444336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,fp8,0,0.1442346672217051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,128,1,fp8,fp8,0,0.13833600282669067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.1420799990495046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.1362933317820231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,float16,0,0.1402666668097178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.14019200205802917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,fp8,0,0.14087999860445657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,128,1,fp8,fp8,0,0.13220799962679544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.1404159963130951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.13212800025939941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,float16,0,0.1400320033232371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,fp8,0,0.1404213309288025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,128,1,fp8,fp8,0,0.13191466530164084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.14045866330464682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.13193066914876303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,float16,0,0.14035733540852866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.13988266388575235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,fp8,0,0.1402506629625956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,128,1,fp8,fp8,0,0.13209066788355509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.14037866393725076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.13307199875513712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,float16,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.07879466811815898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,fp8,0,0.1606986622015635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,128,1,fp8,fp8,0,0.07702933251857758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.08070399860541026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.07647466659545898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,float16,0,0.07852266728878021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.07868266602357228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,fp8,0,0.07877866427103679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,128,1,fp8,fp8,0,0.07454933226108551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.0788373351097107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.07338133454322815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,float16,0,0.07867200175921123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.078575998544693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,fp8,0,0.07874666651089986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.07890666524569194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,128,1,fp8,fp8,0,0.07371733089288075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.0746506651242574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,float16,0,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,fp8,0,0.0794293334086736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,128,1,fp8,fp8,0,0.07493333518505096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.0744053324063619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.07949866851170857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,float16,0,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.048197334011395775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,128,1,fp8,fp8,0,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.048351998130480446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.04609066744645437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,fp8,0,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,128,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,float16,0,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,128,1,fp8,fp8,0,0.04428266485532125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,128,1,fp8,fp8,0,0.02976000060637792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,float16,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,fp8,0,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,128,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,128,1,fp8,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,128,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,128,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.019733333339293797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,128,1,fp8,fp8,0,0.019808000574509304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,128,1,fp8,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.02080533280968666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,float16,0,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,128,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,float16,0,3.4672587712605796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,20.950341542561848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,fp8,0,3.4983040491739907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,128,1,fp8,fp8,0,3.2446721394856772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,20.98150380452474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,19.43661880493164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,float16,0,3.509920120239258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,20.99720509847005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,fp8,0,3.535759925842285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,128,1,fp8,fp8,0,3.287541389465332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,21.01965840657552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,19.478256225585938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,float16,0,3.5292161305745444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,21.03233591715495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,fp8,0,3.555845260620117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,128,1,fp8,fp8,0,3.3122345606486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,21.062586466471355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,19.50184504191081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,float16,0,3.560490608215332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,21.09774398803711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,fp8,0,3.5901174545288086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,128,1,fp8,fp8,0,3.3504638671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,21.127919514973957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,19.542789459228516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,float16,0,2.0606133143107095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,10.949834187825521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,fp8,0,2.107013384501139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,128,1,fp8,fp8,0,1.998762607574463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,10.996959686279297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,10.151093165079752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,float16,0,1.815941333770752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,10.604298909505209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,fp8,0,1.8319199879964192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,128,1,fp8,fp8,0,1.6992106437683105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,10.622320175170898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,9.843247731526693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,float16,0,1.8183040618896484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,10.613429387410482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,fp8,0,1.8349653879801433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,128,1,fp8,fp8,0,1.7052799860636394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,10.622821172078451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,9.848938624064127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,float16,0,1.8275359471638997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,10.629770914713541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,fp8,0,1.8424852689107258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,128,1,fp8,fp8,0,1.7146612803141277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,10.648384094238281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,9.859386444091797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,float16,0,1.8414506912231445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,10.664288202921549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,fp8,0,1.8605440457661946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,128,1,fp8,fp8,0,1.7336907386779785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,10.680676778157553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,9.879744211832682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,float16,0,1.1280799706776936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,5.624165217081706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,fp8,0,1.1560213565826416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,128,1,fp8,fp8,0,1.1051572958628337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,5.65127436319987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,5.229450543721517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,float16,0,1.0138986905415852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,5.4616959889729815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,fp8,0,1.0203093687693279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,128,1,fp8,fp8,0,0.9557653268178304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,5.468149185180664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,5.076282819112142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,float16,0,1.0167466799418132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,5.4640318552653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,fp8,0,1.0227253437042236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,128,1,fp8,fp8,0,0.9607040087381998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,5.473530451456706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,5.0799252192179365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,float16,0,1.019541343053182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,5.4737599690755205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,fp8,0,1.0285120010375977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,128,1,fp8,fp8,0,0.9661440054575602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,5.4810136159261065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,5.084693272908528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,float16,0,1.0267306963602703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,5.489626566569011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,fp8,0,1.0353813171386719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,128,1,fp8,fp8,0,0.9728373686472574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,5.497664133707683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,5.0939146677653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,float16,0,0.7715520064036051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,3.070357322692871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,fp8,0,0.7715413570404053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,128,1,fp8,fp8,0,0.7314240137736002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,3.0704053243001304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,2.8403520584106445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,float16,0,0.7688266436258951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,3.045215924580892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,fp8,0,0.7694453398386637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,128,1,fp8,fp8,0,0.7302026748657227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,3.0446879069010415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,2.8377707799275718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,float16,0,0.7701226870218912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,3.047423998514811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,fp8,0,0.7697493235270182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,128,1,fp8,fp8,0,0.7305493354797363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,3.0468854904174805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,2.837871869405111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,float16,0,0.7687786420186361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,3.0472586949666343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,fp8,0,0.7697599728902181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,128,1,fp8,fp8,0,0.7312800089518229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,3.0477654139200845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,2.837946573893229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,float16,0,0.7684480349222819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,3.052133242289225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,fp8,0,0.7699413299560547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,128,1,fp8,fp8,0,0.7301332950592041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,3.0538880030314126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,2.8388214111328125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,float16,0,2.5884265899658203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,12.339088439941406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,fp8,0,2.610901355743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,128,1,fp8,fp8,0,2.4093333880106607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.02479466547568639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,12.359055836995443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,float16,0,2.5970400174458823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,11.45263417561849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,fp8,0,2.619685331980387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,12.356986999511719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,128,1,fp8,fp8,0,2.430586655934652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,12.375125885009766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,11.460688273111979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,float16,0,2.609370708465576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,fp8,0,2.6322453816731772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,128,1,fp8,fp8,0,2.4490453402201333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,12.364480336507162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,12.40066655476888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,11.480037689208984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,float16,0,2.631194591522217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,fp8,0,2.6561813354492188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,128,1,fp8,fp8,0,2.4782506624857583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,12.409632364908854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,12.446735382080078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,float16,0,1.5428959528605144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,fp8,0,1.5803306897481282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,11.507450103759766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,128,1,fp8,fp8,0,1.4978879292805989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,6.511391957600911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,float16,0,1.3618292808532715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,6.060261408487956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,6.55125363667806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,fp8,0,1.3732320467631023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,128,1,fp8,fp8,0,1.277898629506429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,6.276826858520508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,float16,0,1.3802132606506348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,5.830752054850261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,6.286959966023763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,fp8,fp8,0,1.28329070409139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,fp8,0,1.3786293665568035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,6.294287999471028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,float16,0,1.3814239501953125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,5.8353118896484375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,6.297311782836914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,fp8,0,1.3833227157592773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,128,1,fp8,fp8,0,1.2896479765574138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,6.294277191162109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,float16,0,1.3813759485880535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,6.303317387898763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,5.848335901896159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,fp8,0,1.3970880508422852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,128,1,fp8,fp8,0,1.303376038869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,6.317056020100911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,float16,0,0.8490933577219645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,6.328426361083984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,5.861743927001953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,fp8,0,0.8710827032725016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,128,1,fp8,fp8,0,0.8343093395233154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,3.3771254221598306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,float16,0,0.7654346625010172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,3.150901476542155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,3.397263844807943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,fp8,fp8,0,0.7235679626464844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,fp8,0,0.7695573170979818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,3.26035213470459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,float16,0,0.7673707008361816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,3.038058598836263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,3.2633654276529946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,fp8,0,0.7737706502278646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,128,1,fp8,fp8,0,0.7250773111979166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,3.2643839518229165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,float16,0,0.7690133253733317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,3.0399840672810874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,3.269071896870931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,fp8,fp8,0,0.7360693613688151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,fp8,0,0.775983969370524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,3.2706079483032227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,float16,0,0.775871992111206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,3.0411148071289062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,3.2710240681966147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,fp8,0,0.7816267013549805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,128,1,fp8,fp8,0,0.7365173498789469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,3.281738599141439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,float16,0,0.582751989364624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,3.050858815511068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,3.284832000732422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,fp8,0,0.5841866731643677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,128,1,fp8,fp8,0,0.554208000500997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,1.8862239519755046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,float16,0,0.5844586690266927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,1.885034720102946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,1.748517354329427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,fp8,0,0.5849599838256836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,128,1,fp8,fp8,0,0.5552266836166382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,1.8699146906534831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,float16,0,0.584821343421936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,1.746463934580485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,1.8685812950134277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,fp8,0,0.5845333337783813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,128,1,fp8,fp8,0,0.5532960096995035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,1.8704746564229329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,float16,0,0.5835359891255697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,1.7474026679992676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,1.8693599700927734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,fp8,0,0.584389328956604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,128,1,fp8,fp8,0,0.5537866751352946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,1.8718187014261882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,float16,0,0.5846879879633585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,1.7453600565592449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,1.8723467191060383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,fp8,0,0.5842186609903971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,128,1,fp8,fp8,0,0.5548373460769653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,1.875381310780843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,1.7475147247314453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,1.873103936513265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,float16,0,2.151045322418213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,fp8,0,2.168362617492676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,128,1,fp8,fp8,0,2.0038453737894693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,8.872735977172852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,float16,0,2.1563092867533364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,8.884229024251303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,8.239770889282227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,fp8,0,2.1726506551106772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,128,1,fp8,fp8,0,2.015402634938558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,8.885536193847656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,8.25325838724772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,float16,0,2.16540797551473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,8.896538416544596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,fp8,fp8,0,2.027397314707438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,fp8,0,2.1859572728474936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,8.90341313680013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,8.266127904256185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,float16,0,2.18394136428833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,8.91865094502767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,fp8,fp8,0,2.0514346758524575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,fp8,0,2.2074987093607583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,8.933978398640951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,float16,0,1.2842400074005127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,8.289610544840494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,8.953189214070639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,fp8,0,1.3163786729176838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,128,1,fp8,fp8,0,1.249834696451823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,4.726810773213704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,float16,0,1.138218641281128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,4.757594744364421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,4.399301211039226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,fp8,0,1.147162675857544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,128,1,fp8,fp8,0,1.0676373640696208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,4.530879974365234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,4.53819211324056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,float16,0,1.1403573354085286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,4.215711911519368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,fp8,0,1.151034673055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,128,1,fp8,fp8,0,1.0722933610280354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,4.533904075622559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,4.548368136088054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,4.220117251078288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,float16,0,1.1450239817301433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,fp8,0,1.1560373306274414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,128,1,fp8,fp8,0,1.0787519613901775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,4.539919853210449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,float16,0,1.1535627047220867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,4.222794532775879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,4.558618545532227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,fp8,0,1.1659093697865803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,128,1,fp8,fp8,0,1.0896746317545574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,4.558575948079427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,4.571637471516927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,float16,0,0.7109013398488363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,4.238645235697429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,fp8,0,0.7282079855600992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,128,1,fp8,fp8,0,0.6990880171457926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,2.4603519439697266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,float16,0,0.6387626727422079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,2.4807467460632324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,2.303866704305013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,fp8,0,0.643887996673584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,128,1,fp8,fp8,0,0.6055893500645956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,2.366864045461019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,float16,0,0.6402186552683512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,2.3728747367858887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,2.2101759910583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,fp8,0,0.6467466751734415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,128,1,fp8,fp8,0,0.6078506708145142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,2.3695786794026694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,float16,0,0.6437813440958658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,2.2133119901021323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,2.3770666122436523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,fp8,fp8,0,0.6108959913253784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,fp8,0,0.6499893267949423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,2.375498612721761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,float16,0,0.6463093360265096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,2.216266632080078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,2.3810027440389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,fp8,0,0.6547573407491049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,128,1,fp8,fp8,0,0.616378664970398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,2.3836053212483725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,float16,0,0.489029328028361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,2.2222240765889487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,2.3906505902608237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,fp8,0,0.48851199944814044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,128,1,fp8,fp8,0,0.46355732282002765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,1.3964266777038574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,float16,0,0.48658132553100586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,1.3977012634277344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,1.298730691274007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,fp8,0,0.485861341158549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,128,1,fp8,fp8,0,0.4635733366012573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,1.3837226231892903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,float16,0,0.4859520196914673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,1.2950773239135742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,1.3845973014831543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,fp8,0,0.48667200406392414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,128,1,fp8,fp8,0,0.461733341217041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,1.385690689086914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,1.2962186336517334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,float16,0,0.48738133907318115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,1.385642687479655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,fp8,0,0.48655998706817627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,128,1,fp8,fp8,0,0.4620853265126546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,1.3866453170776367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,1.296832005182902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,1.3866987228393555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,float16,0,0.4861973524093628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,fp8,0,0.48683734734853107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,1.389296054840088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,1.388495922088623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,1.297221342722575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,float16,0,3.3642826080322266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,fp8,0,3.3893601099650064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,128,1,fp8,fp8,0,3.1398134231567383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,11.82644780476888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,11.841407775878906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,10.984751383463541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,float16,0,3.402031898498535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,fp8,0,3.427061398824056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,128,1,fp8,fp8,0,3.184170722961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,11.872815450032553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,11.886469523111979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,11.030080159505209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,float16,0,3.422602653503418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,fp8,0,3.4492479960123696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,128,1,fp8,fp8,0,3.2069972356160483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,11.906906127929688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,11.92355219523112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,float16,0,3.4549385706583657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,64,128,1,fp8,fp8,0,0.04622933268547058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,11.05507787068685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,fp8,0,3.4847946166992188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,128,1,fp8,fp8,0,3.2532374064127603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,11.958058675130209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,float16,0,1.9635252952575684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,11.990848541259766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,11.095466613769531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,fp8,0,2.0105226834615073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,128,1,fp8,fp8,0,1.9055040677388508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,6.2813975016276045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,float16,0,1.7166666984558105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,6.328634897867839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,5.8585866292317705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,fp8,0,1.7340426445007324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,128,1,fp8,fp8,0,1.6048852602640789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,5.971712112426758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,float16,0,1.720021406809489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,5.550330479939778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,5.988314946492513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,fp8,0,1.7382933298746746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,128,1,fp8,fp8,0,1.6116906801859539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,5.975503921508789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,float16,0,1.7287786801656086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,5.5559946695963545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,5.996965408325195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,fp8,0,1.7474613189697266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,128,1,fp8,fp8,0,1.622693379720052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,5.9930775960286455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,float16,0,1.7428852717081706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,5.568085352579753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,6.013690948486328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,fp8,fp8,0,1.6404159863789876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,fp8,0,1.7633066177368164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,6.018357594807942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,float16,0,1.0281919638315837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,5.586591720581055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,3.2132479349772134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,6.042575836181641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,fp8,0,1.0549919605255127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,128,1,fp8,fp8,0,1.0037973721822102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,float16,0,0.9123573303222656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,3.2408908208211265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,3.005978584289551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,fp8,0,0.9191466967264811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,128,1,fp8,fp8,0,0.8582026958465576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,3.063536008199056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,float16,0,0.9137600262959799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,3.074672063191732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,2.8550774256388345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,fp8,0,0.9222986698150635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,128,1,fp8,fp8,0,0.8609546820322672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,3.0679092407226562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,float16,0,0.9184799989064535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,3.0774399439493814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,2.859759966532389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,fp8,0,0.9257973035176595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,128,1,fp8,fp8,0,0.8675413131713867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,3.0753440856933594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,float16,0,0.9246773719787598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,3.085599899291992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,2.8641974131266275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,fp8,0,0.9347413380940756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,3.089461326599121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,3.102421442667643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,float16,0,0.57096000512441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,2.871802647908529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,fp8,0,0.5856159925460815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,1.6913173993428547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,128,1,fp8,fp8,0,0.562170664469401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,1.7056320508321126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,1.5850826899210613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,float16,0,0.5137866735458374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,fp8,0,0.5173173348108927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,1.6168640454610188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,128,1,fp8,fp8,0,0.48790399233500165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,1.6208532651265461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,1.5109225908915203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,float16,0,0.5168533325195312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,fp8,0,0.5186560153961182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,1.6194666226704915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,128,1,fp8,fp8,0,0.4904160102208455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,1.5135413805643718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,1.6252907117207844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,float16,0,0.5184266567230225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,fp8,0,0.5226186513900757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,1.6236213048299153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,128,1,fp8,fp8,0,0.4934346675872803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,1.6279306411743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,1.5146400133768718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,float16,0,0.5213919878005981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,fp8,0,0.5268959999084473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,1.6283094088236492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,128,1,fp8,fp8,0,0.5022079944610596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,1.6338079770406086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,1.5188426971435547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,float16,0,0.3940906524658203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,0.984106699625651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,fp8,0,0.3956906795501709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,128,1,fp8,fp8,0,0.37603731950124103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,0.9812906583150228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,0.9124213059743246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,float16,0,0.39239466190338135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,0.9687893390655518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,fp8,0,0.39450132846832275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,128,1,fp8,fp8,0,0.3754613399505615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,0.9701973597208658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,float16,0,0.39234666029612225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,0.9117386341094971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,fp8,0,0.3949386676152547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,0.969749371210734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,128,1,fp8,fp8,0,0.3760853211085002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,0.9700053532918295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,0.9114240010579427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,float16,0,0.3940266768137614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,fp8,0,0.39414934317270917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,0.9751839637756348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,128,1,fp8,fp8,0,0.3757919867833455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,0.9750133355458578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,0.9104320208231608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,float16,0,0.3959893385569255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,fp8,0,0.3946559826533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,0.9735946655273438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,128,1,fp8,fp8,0,0.3760266701380412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,0.97380264600118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,0.9134933153788248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,float16,0,2.5025013287862143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,fp8,0,2.523109277089437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,7.1517759958903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,128,1,fp8,fp8,0,2.3274025917053223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,6.634047826131185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,7.171498616536458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,float16,0,2.512928009033203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,fp8,0,2.534320036570231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,7.166720072428386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,128,1,fp8,fp8,0,2.3510986963907876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,7.188570658365886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,6.658175786336263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,float16,0,2.5268266995747886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,fp8,0,2.549546718597412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,7.187072118123372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,128,1,fp8,fp8,0,2.371653397878011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,7.211999893188477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,6.681423823038737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,float16,0,2.5472426414489746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,fp8,0,2.570645332336426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,7.228517532348633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,128,1,fp8,fp8,0,2.396554629007975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,6.704309463500977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,7.252325057983398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,float16,0,1.4713172912597656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,fp8,0,1.5040906270345051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,3.8547627131144204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,128,1,fp8,fp8,0,1.425173282623291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,3.888853391011556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,3.6030826568603516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,float16,0,1.2888320287068684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,fp8,0,1.3002399603525798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,3.6297972997029624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,3.641061464945475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,3.3772853215535483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,float16,0,1.2912000020345051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,3.6360479990641275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,fp8,0,1.3033920129140217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,128,1,fp8,fp8,0,1.2100799878438313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,3.647573471069336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,3.384341239929199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,float16,0,1.2977333068847656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,3.647850672403971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,fp8,0,1.3109920024871826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,128,1,fp8,fp8,0,1.2175146738688152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,3.6599998474121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,3.3890933990478516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,float16,0,1.3072106838226318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,3.6663573582967124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,fp8,0,1.3227732976277669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,128,1,fp8,fp8,0,1.231386661529541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,3.678405443827311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,3.4042399724324546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,float16,0,0.7756266593933105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,1.9859733581542969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,fp8,0,0.7953973611195883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,128,1,fp8,fp8,0,0.7566613356272379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,2.004762649536133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,1.863327980041504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,float16,0,0.688431978225708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,1.877285321553548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,fp8,0,0.6931467056274414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,128,1,fp8,fp8,0,0.6479893525441488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,1.8833333651224773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,1.7519359588623047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,float16,0,0.6903839906056722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,1.8811893463134766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,fp8,0,0.6952426433563232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,128,1,fp8,fp8,0,0.652021328608195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,1.8871359825134277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,1.7549920082092285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,float16,0,0.6928640206654867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,1.8846240043640137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,fp8,0,0.6992053190867106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,128,1,fp8,fp8,0,0.654421329498291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,1.8931786219278972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,1.7585652669270833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,float16,0,0.6993973255157471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,1.8965013821919758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,fp8,0,0.7060693105061849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,128,1,fp8,fp8,0,0.6604640086491903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,1.9029866854349773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,1.7653493881225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,float16,0,0.43375468254089355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,1.0563093026479085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,fp8,0,0.4447253147761027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,128,1,fp8,fp8,0,0.4268480141957601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,1.067733367284139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,0.9973546663920084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,float16,0,0.389903982480367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,1.00326935450236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,fp8,0,0.3937866687774658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,128,1,fp8,fp8,0,0.3715626796086629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,1.0061919689178467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,0.9391252994537354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,float16,0,0.38976534207661945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,1.0046026706695557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,fp8,0,0.3945173422495524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,128,1,fp8,fp8,0,0.37284799416859943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,1.0073973337809246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,0.9425600369771322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,float16,0,0.3937866687774658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,1.0091626644134521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,fp8,0,0.39659734567006427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,128,1,fp8,fp8,0,0.3750186761220296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,1.01091734568278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,0.9440586566925049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,float16,0,0.39581334590911865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,1.014522631963094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,fp8,0,0.4002133210500081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,128,1,fp8,fp8,0,0.3787200053532918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,1.0182452996571858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,0.9481546878814697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,float16,0,0.30216532945632935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,0.6332746744155884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,fp8,0,0.3033813238143921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,128,1,fp8,fp8,0,0.287498672803243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,0.6319413185119629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,0.5902666648228964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,float16,0,0.2995413343111674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,0.6260693470637003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,fp8,0,0.30031466484069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,128,1,fp8,fp8,0,0.28564266363779706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,0.625327984491984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,0.5882453521092733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,float16,0,0.3006719946861267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,0.6258293390274048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,fp8,0,0.3010080059369405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,128,1,fp8,fp8,0,0.2857439915339152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,0.6254986524581909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,0.588592012723287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,float16,0,0.30052266518274945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,0.6256586710611979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,fp8,0,0.3003679911295573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,128,1,fp8,fp8,0,0.28619199991226196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,0.6261546611785889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,0.5883359909057617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,float16,0,0.30136533578236896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,0.6279626687367758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,fp8,0,0.30165332555770874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,128,1,fp8,fp8,0,0.28572267293930054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,0.6282399892807007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,0.5879840056101481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,float16,0,3.302330652872721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,7.193413416544597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,fp8,0,3.3281227747599282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,128,1,fp8,fp8,0,3.078394571940104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,7.217754364013672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,6.695562362670898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,float16,0,3.3431307474772134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,7.2364959716796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,fp8,0,3.3689759572347007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,128,1,fp8,fp8,0,3.124314626057943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,7.25933837890625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,6.741861343383789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,float16,0,3.361839930216471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,7.264981587727864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,fp8,0,3.3884105682373047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,128,1,fp8,fp8,0,3.150298754374186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,7.290442784627278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,6.766725540161133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,float16,0,3.395845413208008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,7.311381022135417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,fp8,0,3.422133445739746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,128,1,fp8,fp8,0,3.1875893274943032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,7.335472106933594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,6.8056214650472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,float16,0,1.9102400143941243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,3.9106667836507163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,fp8,0,1.9492799441019695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,128,1,fp8,fp8,0,1.8468480110168457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,3.9485387802124023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,3.6753546396891275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,float16,0,1.6623999277750652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,3.6189438501993814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,fp8,0,1.6765440305074055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,128,1,fp8,fp8,0,1.5499359766642253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,3.634127934773763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,3.36896546681722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,float16,0,1.666927973429362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,3.6273972193400064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,fp8,0,1.6835254033406575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,128,1,fp8,fp8,0,1.5563146273295085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,3.642383893330892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,3.376192092895508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,float16,0,1.6781333287556965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,3.6417760848999023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,fp8,0,1.694533348083496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,128,1,fp8,fp8,0,1.567855993906657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,3.65667724609375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,3.38916810353597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,float16,0,1.6903786659240723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,3.6613438924153647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,fp8,0,1.7088425954182942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,128,1,fp8,fp8,0,1.5856107076009114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,3.678271929423014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,3.4074185689290366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,float16,0,0.9838773409525553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,1.9956639607747395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,fp8,0,1.006442705790202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,128,1,fp8,fp8,0,0.953109343846639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,2.0176053047180176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,1.8787733713785808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,float16,0,0.8629493713378906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,1.8547946612040203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,fp8,0,0.8717652956644694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,128,1,fp8,fp8,0,0.8084906737009684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,1.8621226946512859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,1.7303573290507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,float16,0,0.8668533166249593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,1.8586773872375488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,fp8,0,0.875269333521525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,128,1,fp8,fp8,0,0.8118133544921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,1.8655039469401042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,1.7346293131510417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,float16,0,0.8698293368021647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,1.8646453221638997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,fp8,0,0.8795999685923258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,128,1,fp8,fp8,0,0.8179360230763754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,1.8736000061035156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,1.7390720049540203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,float16,0,0.8762986660003662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,1.8738400141398113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,fp8,0,0.8864853382110596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,128,1,fp8,fp8,0,0.8260800043741862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,1.884885311126709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,1.7472586631774902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,float16,0,0.5226346651713053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,1.0404480298360188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,fp8,0,0.5360906521479288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,128,1,fp8,fp8,0,0.510757327079773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,1.053920030593872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,0.9849066734313965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,float16,0,0.4623680114746094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,0.9702880382537842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,fp8,0,0.4660960038503011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,128,1,fp8,fp8,0,0.4384640057881673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,0.974560022354126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,0.9110613663991293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,float16,0,0.46401067574818927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,0.9710933367411295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,fp8,0,0.4676320155461629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,128,1,fp8,fp8,0,0.4397653341293335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,0.9781546592712402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,0.9124746322631836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,float16,0,0.46645867824554443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,0.9756746292114258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,fp8,0,0.47115198771158856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,128,1,fp8,fp8,0,0.44252268473307294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,0.9800533453623453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,0.9153013229370117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,float16,0,0.4715786774953206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,0.9817759990692139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,fp8,0,0.47510401407877606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,128,1,fp8,fp8,0,0.4465973377227783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,0.9861546357472738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,0.9200533231099447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,float16,0,0.2956479986508687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,0.5660746494928995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,fp8,0,0.3023413419723511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,128,1,fp8,fp8,0,0.29148799180984497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,0.5744106769561768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,0.5398559967676798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,float16,0,0.26311467091242474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,0.5279946724573771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,fp8,0,0.26306132475535077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,128,1,fp8,fp8,0,0.2521493236223857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,0.5290186802546183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,0.5006826718648275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,float16,0,0.26158400376637775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,0.5293066501617432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,fp8,0,0.2645813425381978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,128,1,fp8,fp8,0,0.25464532772699994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,0.5327146848042806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,0.5033013423283895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,float16,0,0.26314665873845416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,0.5313013394673666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,fp8,0,0.2659999926884969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,128,1,fp8,fp8,0,0.255130668481191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,0.5325280030568441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,0.5048480033874512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,float16,0,0.2688960035641988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,0.5358773469924927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,fp8,0,0.2701173424720764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,128,1,fp8,fp8,0,0.2582186659177144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,0.5371520121892294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,0.5058133204778036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,float16,0,0.20586667458216348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,0.35602664947509766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,fp8,0,0.20668800671895346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,128,1,fp8,fp8,0,0.19627734025319418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,0.35550932089487713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,0.33426133791605633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,float16,0,0.20344533522923788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.3508640130360921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,fp8,0,0.20323199033737183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,128,1,fp8,fp8,0,0.19320533672968546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.3490026791890462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,0.3295680085817973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,float16,0,0.2019466757774353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.34916265805562335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,fp8,0,0.20360000928243002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,128,1,fp8,fp8,0,0.19341866175333658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.3511360088984172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,0.329312006632487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,float16,0,0.2025973399480184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.3514453172683716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,fp8,0,0.2034719983736674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,128,1,fp8,fp8,0,0.19375999768575033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.3604480028152466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,0.3397013346354167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,float16,0,0.2016800045967102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.3524479866027832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,fp8,0,0.20364266633987427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,128,1,fp8,fp8,0,0.1954186757405599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.35093335310618085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,0.3327733278274536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,float16,0,2.4557813008626304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,4.514677365620931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,fp8,0,2.4770560264587402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,128,1,fp8,fp8,0,2.28221861521403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,4.535429318745931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,4.206000010172526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,float16,0,2.4703466097513833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,4.530783971150716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,fp8,0,2.4902453422546387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,128,1,fp8,fp8,0,2.305957317352295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,4.553493181864421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,4.231301307678223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,float16,0,2.485429286956787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,4.551024119059245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,fp8,0,2.506389300028483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,128,1,fp8,fp8,0,2.323434670766195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,4.572618802388509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,4.24728520711263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,float16,0,2.5073973337809243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,4.582874615987142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,fp8,0,2.529365380605062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,128,1,fp8,fp8,0,2.349717299143473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,4.602410634358724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,4.275039990743001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,float16,0,1.433461348215739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,2.4990720748901367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,fp8,0,1.4616479873657227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,128,1,fp8,fp8,0,1.3833600680033367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,2.5273280143737793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,2.358778635660807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,float16,0,1.2486293315887451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,2.2882827123006186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,fp8,0,1.261029322942098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,128,1,fp8,fp8,0,1.1651519934336345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,2.300325393676758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,2.1350560188293457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,float16,0,1.253007968266805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,2.2942612965901694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,fp8,0,1.2661973635355632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,128,1,fp8,fp8,0,1.1705013116200764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,2.3052426973978677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,2.1405280431111655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,float16,0,1.260639985402425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,2.3030293782552085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,fp8,0,1.2732960383097331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,128,1,fp8,fp8,0,1.1777973175048828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,2.315253257751465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,2.1470559438069663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,float16,0,1.271498680114746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,2.3178186416625977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,fp8,0,1.2852853139241536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,128,1,fp8,fp8,0,1.1919840176900227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,2.331765333811442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,2.162783940633138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,float16,0,0.7414613564809164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,1.2834933598836262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,fp8,0,0.7600159645080566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,128,1,fp8,fp8,0,0.721887985865275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,1.3017600377400715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,1.2157440185546875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,float16,0,0.6515306631724039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,1.180239995320638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,fp8,0,0.6573226849238077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,128,1,fp8,fp8,0,0.6119573513666788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,1.1864426930745442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,1.1040480136871338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,float16,0,0.6550986766815186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,1.1824853420257568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,fp8,0,0.659333348274231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,128,1,fp8,fp8,0,0.6148533423741659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,1.1897546450297039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,1.1067413489023845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,float16,0,0.6563466787338257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,1.1870453357696533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,fp8,0,0.6634186506271362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,128,1,fp8,fp8,0,0.6182560125986735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,1.1937119960784912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,1.112277348836263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,float16,0,0.6631893316904703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,1.193562666575114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,fp8,0,0.6702986558278402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,128,1,fp8,fp8,0,0.6232479810714722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,1.201701323191325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,1.118058681488037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,float16,0,0.39742398262023926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,0.6760266621907552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,fp8,0,0.40640532970428467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,128,1,fp8,fp8,0,0.3882453441619873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,0.6869813601175944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,0.6446826855341593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,float16,0,0.3502986828486125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,0.6212960084279379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,fp8,0,0.35311468442281085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,128,1,fp8,fp8,0,0.3328160047531128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,0.6257280111312866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,0.5892693201700846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,float16,0,0.3512266476949056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,0.6247040033340454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,fp8,0,0.3548640012741089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,128,1,fp8,fp8,0,0.33478931585947674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,0.6275466680526733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,0.5908480087916056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,float16,0,0.35328535238901776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,0.6261546611785889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,fp8,0,0.3571840127309163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,128,1,fp8,fp8,0,0.336575984954834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,0.6301920016606649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,0.5932480096817017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,float16,0,0.3569386800130208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,0.6319253444671631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,fp8,0,0.3612266778945923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,128,1,fp8,fp8,0,0.3389973243077596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,0.6341386636098226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,0.5946933428446451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,float16,0,0.22602667411168417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,0.3757280111312866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,fp8,0,0.23264533281326294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,128,1,fp8,fp8,0,0.22292266289393106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,0.3799946705500285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,0.3593440055847168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,float16,0,0.1987839937210083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,0.34388267993927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,fp8,0,0.20011732975641885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,128,1,fp8,fp8,0,0.19200533628463745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,0.3451146682103475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,0.32941333452860516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,float16,0,0.19944000244140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,0.34330133597056073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,fp8,0,0.20057600736618042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,128,1,fp8,fp8,0,0.1924053430557251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,0.3453173240025838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,0.3301600019137065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,float16,0,0.20074133078257242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,0.34492266178131104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,fp8,0,0.20158400138219199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,128,1,fp8,fp8,0,0.19529066483179727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,0.348527987798055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,0.3314133286476135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,float16,0,0.20323199033737183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,0.3476639986038208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,fp8,0,0.2037973403930664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,128,1,fp8,fp8,0,0.19777067502339682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,0.35100265343983966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,0.3344533443450928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,float16,0,0.1605280041694641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.2440053423245748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,fp8,0,0.16224533319473267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,128,1,fp8,fp8,0,0.15225066741307577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.2423893411954244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.22924266258875528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,float16,0,0.15820266803105673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.2383306622505188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,fp8,0,0.15706132849057516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,128,1,fp8,fp8,0,0.14974400401115417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.24099733432133993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.22894400358200073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,float16,0,0.16032532850901285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.23982399702072144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,fp8,0,0.15892266233762106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,128,1,fp8,fp8,0,0.15056533614794412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.2389919956525167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.22637333472569784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,float16,0,0.15705600380897522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.23906133572260538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,fp8,0,0.15867732961972555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,128,1,fp8,fp8,0,0.15026666720708212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.2387733260790507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.22691200176874796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,float16,0,0.15810666481653848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.2400266726811727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,fp8,0,0.15853866934776306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,128,1,fp8,fp8,0,0.15016532937685648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.23846399784088135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.2263573408126831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,float16,0,3.2784105936686196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,4.879066785176595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,fp8,0,3.3006347020467124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,128,1,fp8,fp8,0,3.037973403930664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,4.898853302001953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,4.54205862681071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,float16,0,3.324864069620768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,4.925605456034343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,fp8,0,3.3359785079956055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,128,1,fp8,fp8,0,3.0618985493977866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,64,128,1,fp8,fp8,0,0.4639306863149007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,4.934634526570638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,4.56497065226237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,float16,0,3.32914129892985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,fp8,0,3.350309371948242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,4.92904535929362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,128,1,fp8,fp8,0,3.0827414194742837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,4.95308272043864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,4.588512102762858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,float16,0,3.381066640218099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,fp8,0,3.3961865107218423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,4.999482790629069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,128,1,fp8,fp8,0,3.118618647257487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,5.01034673055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,float16,0,1.8856159845987956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,4.6265974044799805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,fp8,0,1.9143306414286296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,2.724368095397949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,128,1,fp8,fp8,0,1.816186745961507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,2.752133369445801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,2.5820159912109375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,float16,0,1.6325333913167317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,fp8,0,1.6473066012064617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,2.440608024597168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,128,1,fp8,fp8,0,1.518170674641927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,2.447866598765055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,2.2752052942911782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,float16,0,1.6391305923461914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,2.4452692667643228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,fp8,0,1.6702879269917805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,128,1,fp8,fp8,0,1.5270719528198242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,2.457706610361735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,2.2849225997924805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,float16,0,1.6486186981201172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,2.457551956176758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,fp8,0,1.664906660715739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,128,1,fp8,fp8,0,1.5402560234069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,2.4733333587646484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,2.296544075012207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,float16,0,1.6640052795410156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,2.474287986755371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,fp8,0,1.6774187088012695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,128,1,fp8,fp8,0,1.5540374120076497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,2.4930879275004068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,2.312346617380778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,float16,0,0.9605759779612223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,fp8,0,0.9798399607340494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,1.385423978169759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,128,1,fp8,fp8,0,0.9278773466746012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,1.403813362121582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,1.315872033437093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,float16,0,0.8367733160654703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,fp8,0,0.8443573315938314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,1.2442453702290852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,128,1,fp8,fp8,0,0.7800532976786295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,1.2514560222625732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,1.1649493376413982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,float16,0,0.8408213456471761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,1.2496586640675862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,fp8,0,0.8494346936543783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,128,1,fp8,fp8,0,0.7854773203531901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,1.2562719980875652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,1.1689866383870442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,float16,0,0.8449546496073405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,fp8,0,0.8525173664093018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,1.253941297531128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,128,1,fp8,fp8,0,0.7968160311381022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,1.262442668279012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,1.1748533248901367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,float16,0,0.852463960647583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,fp8,0,0.8616639773050944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,1.2621813615163167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,128,1,fp8,fp8,0,0.7997386455535889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,1.2732640107472737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,1.1831413110097249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,float16,0,0.49863465627034503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,0.7242773373921713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,fp8,0,0.5099680026372274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,128,1,fp8,fp8,0,0.4862346649169922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,0.7279307047526041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,0.6867520014444987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,float16,0,0.43537068367004395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,fp8,0,0.44525333245595294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,128,1,fp8,fp8,0,0.41305601596832275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,0.6499199867248535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,0.6091839869817098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,float16,0,0.44388798872629803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,0.6528000036875407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,fp8,0,0.4412533442179362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,128,1,fp8,fp8,0,0.414522647857666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,0.6606293519337972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,0.611840009689331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,float16,0,0.4413599967956543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,0.653274655342102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,fp8,0,0.4464213450749715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,128,1,fp8,fp8,0,0.4169013500213623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,0.6569226582845052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,0.615226666132609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,float16,0,0.4505600134531657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,0.6563786665598551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,fp8,0,0.4498560031255086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,128,1,fp8,fp8,0,0.42098132769266766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,0.6630133390426636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,0.6191466649373373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,float16,0,0.2693706750869751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,0.38389865557352704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,fp8,0,0.2773546576499939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,128,1,fp8,fp8,0,0.26763733228047687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,0.3897973299026489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,0.36763731638590497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,float16,0,0.23362666368484497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,0.3435680071512858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,fp8,0,0.23614400625228882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,128,1,fp8,fp8,0,0.22657599051793417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,0.3460746606190999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,0.3303040067354838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,float16,0,0.23648534218470255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,0.3468586603800456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,fp8,0,0.2379253307978312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,128,1,fp8,fp8,0,0.22815465927124023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,0.3475946585337321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,0.33059199651082355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,float16,0,0.23498133818308511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,0.34776000181833905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,fp8,0,0.23935467004776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,128,1,fp8,fp8,0,0.23015999794006348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,0.3492799997329712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,0.332533339659373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,float16,0,0.24041599035263062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,0.3512586752573649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,fp8,0,0.24295467138290405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,128,1,fp8,fp8,0,0.23220266898473105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,0.35339732964833576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,0.33482666810353595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,float16,0,0.15400532881418863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.21835199991861978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,fp8,0,0.1586186687151591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,128,1,fp8,fp8,0,0.15440533558527628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.22180799643198648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.21126399437586466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,float16,0,0.1337440013885498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.1952106753985087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,fp8,0,0.13435199856758118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,128,1,fp8,fp8,0,0.12774399916330972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.19780266284942627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.18505599101384482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,float16,0,0.1341759959856669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.19780800739924112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,fp8,0,0.13802133003870645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,128,1,fp8,fp8,0,0.12948266665140787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.1995946764945984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.18497065703074136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,float16,0,0.13387733697891235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.19767467180887857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,fp8,0,0.13782399892807007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,128,1,fp8,fp8,0,0.12969066699345908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.19933867454528809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.18706132968266806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,float16,0,0.136272003253301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.19949867328008017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,fp8,0,0.13991999626159668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,128,1,fp8,fp8,0,0.13171199957529703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.20011732975641885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.19130132595698038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,float16,0,0.11148800452550252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.1479626695315043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,fp8,0,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,128,1,fp8,fp8,0,0.10728533069292705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.1457919975121816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.14058132966359457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,float16,0,0.10970133543014526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,64,128,1,fp8,fp8,0,0.8745333353678385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.14643200238545737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,fp8,0,0.11132267117500305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,128,1,fp8,fp8,0,0.10719999670982361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.14871999621391296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,float16,0,0.11251733700434367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.14692266782124838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,fp8,0,0.11181867122650146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,128,1,fp8,fp8,0,0.10661866267522176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.14613333344459534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,float16,0,0.11131200194358826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.13995200395584106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.1462453305721283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,fp8,0,0.11175466577212016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,128,1,fp8,fp8,0,0.10763200124104817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.1452959974606832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.14010666807492575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,float16,0,0.10988799730936687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.14641599853833517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,fp8,0,0.11134399970372517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,128,1,fp8,fp8,0,0.10687999924023946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.14621866742769876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.1402453382809957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,float16,0,2.433786710103353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,3.212405204772949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,fp8,0,2.448959986368815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,128,1,fp8,fp8,0,2.2552053133646646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,3.229914665222168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,2.9947306315104165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,float16,0,2.4609813690185547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,3.239215850830078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,fp8,0,2.4712746938069663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,128,1,fp8,fp8,0,2.2734559377034507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,3.251615842183431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,3.0138025283813477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,float16,0,2.485205332438151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,3.267162640889486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,fp8,0,2.492784023284912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,128,1,fp8,fp8,0,2.292229334513346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,3.2748533884684243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,3.033146540323893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,float16,0,2.5053440729777017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,3.2911465962727866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,fp8,0,2.5081119537353516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,128,1,fp8,fp8,0,2.3223519325256348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,3.2978668212890625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,3.0610132217407227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,float16,0,1.414207935333252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,1.8298719724019368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,fp8,0,1.4385333061218262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,128,1,fp8,fp8,0,1.3633440335591633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,1.8507466316223145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,1.741909344991048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,float16,0,1.2235199610392253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,1.6157174110412598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,fp8,0,1.235594669977824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,128,1,fp8,fp8,0,1.1391733487447102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,1.628159999847412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,1.5117653210957844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,float16,0,1.2313600381215413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,1.6239840189615886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,fp8,0,1.242677370707194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,128,1,fp8,fp8,0,1.1485013167063396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,1.6350560188293457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,1.5218186378479004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,float16,0,1.2382240295410156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,1.6338399251302083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,fp8,0,1.249008019765218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,128,1,fp8,fp8,0,1.155674695968628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,1.6435252825419109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,1.5290452639261882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,float16,0,1.2505813439687092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,fp8,0,1.2621546586354573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,1.643818696339925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,128,1,fp8,fp8,0,1.1686560312906902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,1.6577706336975098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,1.556074619293213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,float16,0,0.7357227007548014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,fp8,0,0.7383999824523926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,0.935856024424235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,128,1,fp8,fp8,0,0.6996959845225016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,0.9469760258992513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,0.891045331954956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,float16,0,0.629589319229126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,fp8,0,0.6351306835810343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,0.8297066688537598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,128,1,fp8,fp8,0,0.5881760120391846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,0.8354880015055338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,0.779029369354248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,float16,0,0.633626659711202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,0.8323840300242106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,fp8,0,0.6483253240585327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,128,1,fp8,fp8,0,0.5925920009613037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,0.8384799957275391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,0.7849226792653402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,float16,0,0.6351360082626343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,0.8383039633433024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,fp8,0,0.6428746779759725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,128,1,fp8,fp8,0,0.596778670946757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,0.844650665918986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,0.788703997929891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,float16,0,0.6412853399912516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,0.8581439654032389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,fp8,0,0.6474186579386393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,128,1,fp8,fp8,0,0.603488008181254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,0.851248025894165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,0.7944160302480062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,0.487498680750529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,float16,0,0.3795679807662964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,fp8,0,0.387722651163737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,128,1,fp8,fp8,0,0.368341326713562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,0.50709335009257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,0.468506654103597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,float16,0,0.33680001894632977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,0.43142398198445636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,fp8,0,0.33266133069992065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,128,1,fp8,fp8,0,0.31303467353185016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,0.4361813465754191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,0.41276268164316815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,float16,0,0.3304746747016907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,0.4342399835586548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,fp8,0,0.3324906627337138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,128,1,fp8,fp8,0,0.31459200382232666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,0.4365226825078328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,0.41333333651224774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,float16,0,0.33262399832407635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,0.43721067905426025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,fp8,0,0.3367786804835002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,128,1,fp8,fp8,0,0.31785066922505695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,0.44019198417663574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,0.41708266735076904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,float16,0,0.33692800998687744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,0.4413439830144246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,fp8,0,0.34093332290649414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,128,1,fp8,fp8,0,0.3197439908981323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,0.45400532086690265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,0.41837334632873535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,float16,0,0.20998932917912802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,fp8,0,0.21176000436147055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,128,1,fp8,fp8,0,0.20374399423599243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,0.26942400137583417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,0.25641600290934247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,float16,0,0.1771893302599589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.23244265715281168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,fp8,0,0.17930134137471518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,128,1,fp8,fp8,0,0.1727786660194397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.2362933357556661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.22615466515223184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,float16,0,0.17723200718561807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.23276267449061075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,fp8,0,0.17921600739161173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,128,1,fp8,fp8,0,0.17299199104309082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.23618666330973306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.22631466388702393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,float16,0,0.17851734161376953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.23438932498296103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,fp8,0,0.18093333641688028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,128,1,fp8,fp8,0,0.17595199743906656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.23709867397944132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.228383998076121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,float16,0,0.1811306675275167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.2365600069363912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,fp8,0,0.18301333983739218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,128,1,fp8,fp8,0,0.17708800236384073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.24090667565663657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.2323413292566935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,float16,0,0.11985066533088684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.15266666809717813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,fp8,0,0.12185600399971008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,128,1,fp8,fp8,0,0.1209386686484019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.15548800428708395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.15109333395957947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,float16,0,0.10550399621327718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.136272003253301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,fp8,0,0.10596266388893127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,128,1,fp8,fp8,0,0.10014399886131287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.13831999897956848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.13143466909726462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,float16,0,0.10523200035095215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.1361066699028015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,fp8,0,0.10687466462453206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,128,1,fp8,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.1381066640218099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.13065066933631897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,float16,0,0.10560533404350281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.13674666484196982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,fp8,0,0.1076800028483073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,128,1,fp8,fp8,0,0.1013866662979126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.13803199927012125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.13005866607030234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,float16,0,0.10567466417948405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.13792533675829569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,fp8,0,0.10902399818102519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,128,1,fp8,fp8,0,0.1036906639734904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.13974933822949728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.1337279975414276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,float16,0,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.10700266559918721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,64,128,1,fp8,fp8,0,1.2063039938608806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,128,1,fp8,fp8,0,0.08292800188064575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.10698133707046509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.10328533252080281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,float16,0,0.08902933200200398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.10765332976977031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,fp8,fp8,0,0.08409066994984944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,fp8,0,0.08784533540407817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.1071573297182719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.10318932930628459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,float16,0,0.08665066957473755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.10672533512115479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,fp8,0,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,128,1,fp8,fp8,0,0.08322666585445404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.10645866394042969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.1035146713256836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,float16,0,0.08686932921409607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,fp8,0,0.08691733082135518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.10776533683141072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,128,1,fp8,fp8,0,0.08342933654785156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.10753599802652995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.10317867000897725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,float16,0,0.08712533116340637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.10774399836858113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,128,1,fp8,fp8,0,0.08287466565767924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.10687466462453206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.10131200154622395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,float16,0,2.9269227981567383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,3.4274988174438477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,fp8,0,2.9168532689412436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,128,1,fp8,fp8,0,2.875743865966797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,3.410090764363607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,3.356858571370443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,float16,0,2.955199877421061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,3.4461867014567056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,fp8,0,2.9370667139689126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,128,1,fp8,fp8,0,2.956554730733236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,3.430154800415039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,3.434896151224772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,float16,0,3.029786745707194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,3.5210612614949546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,fp8,0,3.006160100301107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,128,1,fp8,fp8,0,2.9573227564493814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,3.439903895060221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,3.5031681060791016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,float16,0,3.114656130472819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,3.636773427327474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,fp8,0,3.0759359995524087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,128,1,fp8,fp8,0,3.157557487487793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,3.5766026178995767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,3.6452372868855796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,float16,0,1.613925298055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,1.87882661819458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,fp8,0,1.6068000793457031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,128,1,fp8,fp8,0,1.6321813265482585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,1.8843520482381184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,1.8773600260416667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,float16,0,1.4808212916056316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,1.7251946131388347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,fp8,0,1.4737706184387207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,128,1,fp8,fp8,0,1.430799961090088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,1.7193279266357422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,1.6702399253845215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,float16,0,1.4893546104431152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,1.73416535059611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,fp8,0,1.4822079340616863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,128,1,fp8,fp8,0,1.4529013633728027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,1.732026735941569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,1.6900639533996582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,float16,0,1.4976000785827637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,1.7359894116719563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,fp8,0,1.4889225959777832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,128,1,fp8,fp8,0,1.466767946879069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,1.7382399241129558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,1.699130694071452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,float16,0,1.5491199493408203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,1.7793973286946614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,fp8,0,1.4964906374613445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,128,1,fp8,fp8,0,1.5801973342895508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,1.7528266906738281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,1.824021339416504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,float16,0,0.8001759847005209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,fp8,0,0.7958880265553793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,0.9387093385060629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,128,1,fp8,fp8,0,0.8209333419799805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,0.926042636235555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,float16,0,0.7490293184916178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,0.8723626931508383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,fp8,0,0.7457760175069174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,128,1,fp8,fp8,0,0.7073333263397217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,0.8730613390604655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,0.8292480309804281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,float16,0,0.7537546952565511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,0.8784800370534261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,fp8,0,0.7502880096435547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,128,1,fp8,fp8,0,0.7250773111979166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,0.8771359920501709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,0.8547039826711019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,float16,0,0.7520319620768229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,0.880085309346517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,fp8,0,0.7541066805521647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,128,1,fp8,fp8,0,0.719813346862793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,0.8788800239562988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,0.8437919616699219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,float16,0,0.7604266802469889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,0.8879093329111735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,fp8,0,0.7549440066019694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,128,1,fp8,fp8,0,0.797813336054484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,0.8829066753387451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,0.903119961420695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,float16,0,0.4110666513442993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,0.4805226723353068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,fp8,0,0.4011146624883016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,128,1,fp8,fp8,0,0.42081598440806073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,0.48470401763916016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,0.4809439977010091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,float16,0,0.3821706771850586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,0.44785066445668537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,fp8,0,0.3822133143742879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,128,1,fp8,fp8,0,0.36324799060821533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,0.4476693471272786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,0.4248640139897664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,float16,0,0.38420267899831134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,0.4593973159790039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,fp8,0,0.39182400703430176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,128,1,fp8,fp8,0,0.3698986768722534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,0.4459199905395508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,0.4317440191904704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,float16,0,0.3856853246688843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,0.45075734456380206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,fp8,0,0.38465599219004315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,128,1,fp8,fp8,0,0.3686879873275757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,0.449839989344279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,0.4334239959716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,float16,0,0.3895999987920125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,0.454367995262146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,fp8,0,0.38766932487487793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,128,1,fp8,fp8,0,0.38147199153900146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,0.45216532548268634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,0.4437173207600911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,float16,0,0.21741867065429688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,0.2555306752522786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,fp8,0,0.21395200490951538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,128,1,fp8,fp8,0,0.21913599967956543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,0.25099732478459674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,0.25462400913238525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,float16,0,0.2018079956372579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.23427732785542807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,fp8,0,0.20076799392700195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,128,1,fp8,fp8,0,0.1914880077044169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.23491199811299643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.22433600823084512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,float16,0,0.20247467358907065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.23538132508595785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,fp8,0,0.20150933663050333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,128,1,fp8,fp8,0,0.1932906707127889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.23612799247105917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.2261013388633728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,float16,0,0.20114133755366007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.23559999465942383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,fp8,0,0.20217067003250122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,128,1,fp8,fp8,0,0.1941759983698527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.2351093292236328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.2264853318532308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,float16,0,0.204474667708079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.2386666735013326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,fp8,0,0.20355733235677084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,128,1,fp8,fp8,0,0.1967839996019999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.2373759945233663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.23009600241978964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,float16,0,0.11733333269755046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.13983999689420065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,fp8,0,0.11646399895350139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,128,1,fp8,fp8,0,0.12011733651161194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.13822933038075766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.13843733072280884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,float16,0,0.10549867153167725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.12788266936937967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,fp8,0,0.1070240040620168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,128,1,fp8,fp8,0,0.10170132915178935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.12733333309491476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.12030933300654094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,float16,0,0.1055519978205363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.1279039978981018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,fp8,0,0.10744532942771912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,128,1,fp8,fp8,0,0.10240000486373901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.12809066971143088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.12131200234095256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,float16,0,0.1067039966583252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.12754666805267334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,fp8,0,0.10688533385594685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,128,1,fp8,fp8,0,0.1039520005385081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.12949333588282266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.12158933281898499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,float16,0,0.1074773371219635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.1290720005830129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,fp8,0,0.10802666346232097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,128,1,fp8,fp8,0,0.10541866223017375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.1288373370965322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.12408000230789185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,float16,0,0.06735466420650482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.0791733314593633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,fp8,0,0.06682666639486949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,128,1,fp8,fp8,0,0.07087466617425282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.07926933467388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.08295999964078267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,float16,0,0.06445333361625671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.07587733368078868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,fp8,0,0.06440000236034393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,128,1,fp8,fp8,0,0.06274666885534923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.07681599756081899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.07337066531181335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,float16,0,0.06422933439413707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.07590400179227193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,128,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.0764213353395462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.07282133400440216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,float16,0,0.06409066418806712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.07689600189526875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,fp8,0,0.06538133323192596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,128,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.07681599756081899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.07299200197060902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,float16,0,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.07717866698900859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,fp8,0,0.0653653343518575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,128,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.07585066556930542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.07311466832955678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,float16,0,0.04158399999141693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.05162133276462555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,128,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.051642666260401406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.048858667413393654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,128,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,float16,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,fp8,0,0.04120533416668574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,128,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,float16,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.04866666595141093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,128,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,float16,0,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.04874666531880697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,fp8,0,0.04011200120051702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,128,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,float16,0,2.838085174560547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,2.851680119832357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,fp8,0,2.8269545237223306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,128,1,fp8,fp8,0,2.7933225631713867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,2.8489761352539062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,2.8250932693481445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,float16,0,2.86081600189209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,2.878314654032389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,fp8,0,2.8562399546305337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,128,1,fp8,fp8,0,2.8718719482421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,2.874197324117025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,2.898906707763672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,float16,0,2.943135897318522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,2.9724960327148438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,fp8,0,2.937626520792643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,128,1,fp8,fp8,0,2.8776426315307617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,2.9569066365559897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,2.911109288533529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,float16,0,3.0225226084391275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,3.035162607828776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,fp8,0,2.9908266067504883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,128,1,fp8,fp8,0,3.0769065221150718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,3.023029327392578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,3.114586512247721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,float16,0,1.5627840360005696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,1.5826667149861653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,fp8,0,1.5567199389139812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,128,1,fp8,fp8,0,1.5834879875183105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,1.5741333961486816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,1.5999520619710286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,float16,0,1.4289065996805828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,1.438144048055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,fp8,0,1.4197600682576497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,128,1,fp8,fp8,0,1.3877760569254558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,1.4308106104532878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,1.4070347150166829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,float16,0,1.4445974032084148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,1.4511680603027344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,fp8,0,1.4362239837646484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,128,1,fp8,fp8,0,1.4120267232259114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,1.441637357076009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,1.433909257253011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,float16,0,1.4463733037312825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,1.4555145899454753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,fp8,0,1.4452533721923828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,128,1,fp8,fp8,0,1.4439999262491863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,1.4513012568155925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,1.4304107030232747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,float16,0,1.4756320317586262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,1.4867146809895833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,fp8,0,1.4552906354268391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,128,1,fp8,fp8,0,1.5343839327494304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,1.4661973317464192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,1.5514666239420574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,float16,0,0.7899999618530273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,0.7890933354695638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,fp8,0,0.7663520177205404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,128,1,fp8,fp8,0,0.7905706564585367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,0.7802026271820068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,0.8015519777933756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,float16,0,0.7258773644765218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,0.7311680316925049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,fp8,0,0.7251733144124349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,128,1,fp8,fp8,0,0.702735980351766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,0.7255840301513672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,0.6965386867523193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,float16,0,0.7466773192087809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,0.7346346378326416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,fp8,0,0.7284586429595947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,128,1,fp8,fp8,0,0.7042559782663981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,0.7504479885101318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,0.7097280025482178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,float16,0,0.729850689570109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,0.7353706359863281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,fp8,0,0.7291466395060221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,128,1,fp8,fp8,0,0.6986186504364014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,0.7359573046366373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,0.7202133337656657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,float16,0,0.7383840084075928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,0.7475787003835043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,fp8,0,0.7384160359700521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,128,1,fp8,fp8,0,0.7615093390146891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,0.7416106859842936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,0.7696639696756998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,float16,0,0.39655999342600506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,0.4050186475118001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,fp8,0,0.39238933722178143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,128,1,fp8,fp8,0,0.4051733414332072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,0.3975520133972168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,0.40957868099212646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,float16,0,0.3729493220647176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,0.3752426703770955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,fp8,0,0.3715573151906331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,128,1,fp8,fp8,0,0.3524800141652425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,0.37876800696055096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,0.35797866185506183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,float16,0,0.3731786807378133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,0.3739680051803589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,fp8,0,0.3715626796086629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,128,1,fp8,fp8,0,0.36587735017140705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,0.37201066811879474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,0.3617333173751831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,float16,0,0.37298134962717694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,0.3773333231608073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,fp8,0,0.3739200035730998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,128,1,fp8,fp8,0,0.36271464824676514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,0.3766719897588094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,0.3614186843236287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,float16,0,0.3784746726353963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,0.382149338722229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,fp8,0,0.3768373330434163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,128,1,fp8,fp8,0,0.3729333480199178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,0.3790880044301351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,0.3715840180714925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,float16,0,0.21030400196711221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,0.21474667390187582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,fp8,0,0.20747733116149902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,128,1,fp8,fp8,0,0.21462933222452799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,0.20956265926361084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,0.21670933564503989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,float16,0,0.1945120096206665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.19529600938161215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,fp8,0,0.19447465737660727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,128,1,fp8,fp8,0,0.18668800592422485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.1964213252067566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.18674665689468384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,float16,0,0.19483200709025064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.19664533933003744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,fp8,0,0.1956160068511963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,128,1,fp8,fp8,0,0.1879733403523763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.19633066654205322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.19007466236750284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,float16,0,0.194757342338562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.19546133279800415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,fp8,0,0.19509865840276083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,128,1,fp8,fp8,0,0.18888000647226968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.19738666216532388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.19093332688013712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,float16,0,0.1977333426475525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.19827733437220255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,fp8,0,0.19708800315856934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,128,1,fp8,fp8,0,0.19241599241892496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.19825067122777304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.19343467553456625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,float16,0,0.1141919990380605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.11607467134793599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,fp8,0,0.11354133486747742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,128,1,fp8,fp8,0,0.1183733344078064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.11571199695269267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.12007466952006023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,float16,0,0.1056106686592102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.1053706705570221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,fp8,0,0.10409599542617798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,128,1,fp8,fp8,0,0.0993173321088155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.10261332988739014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,float16,0,0.10485333204269409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.10522666573524475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,fp8,0,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,128,1,fp8,fp8,0,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.10539199908574422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.10295466581980388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,float16,0,0.1055413285891215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.10548800230026245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,fp8,0,0.10487467050552368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,128,1,fp8,fp8,0,0.10117333134015401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.10553066929181416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.10242666800816853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,float16,0,0.10733866691589355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.10788800319035848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,fp8,0,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,128,1,fp8,fp8,0,0.10319466392199199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.10683199763298035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,float16,0,0.0666186660528183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.06763199965159099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,fp8,0,0.06575466692447662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,128,1,fp8,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.06645333270231883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.06932266553243001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,float16,0,0.06337599953015645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.06346133351325989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,fp8,0,0.06323733429114024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,128,1,fp8,fp8,0,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.06259199976921082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,float16,0,0.06233066817124685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.06419200201829274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,fp8,0,0.06353066861629486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,128,1,fp8,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.062650665640831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.06178666651248932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,float16,0,0.06284800171852112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.06413333117961884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,fp8,0,0.06354133288065593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,128,1,fp8,fp8,0,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.06310933331648509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.06154666841030121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,float16,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.06321066617965698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,fp8,0,0.062421331803003945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,128,1,fp8,fp8,0,0.06185600161552429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.061066667238871254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,float16,0,0.040448000033696495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,128,1,fp8,fp8,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.041696002086003624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,fp8,0,0.04053333401679993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,128,1,fp8,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.04119999955097834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,float16,0,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,fp8,0,0.03990400085846583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,128,1,fp8,fp8,0,0.03849600007136663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.04136000076929728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,float16,0,0.04030933231115341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.040181333820025124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,128,1,fp8,fp8,0,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.041221333046754204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,fp8,0,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,128,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,128,1,fp8,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,fp8,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,128,1,fp8,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,float16,0,0.025722667574882507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.025888000925381977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,128,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,128,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,float16,0,1.3291040261586506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,1.3041386604309082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,fp8,0,1.327290693918864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,128,1,fp8,fp8,0,1.280997355779012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,1.2929866313934326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,1.2537439664204915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,float16,0,1.32587734858195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,1.299184004465739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,fp8,0,1.3214879830678303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,128,1,fp8,fp8,0,1.3004000186920166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,1.293023983637492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,1.2812799612681072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,float16,0,1.330415964126587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,1.3074560165405273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,fp8,0,1.3290186723073323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,128,1,fp8,fp8,0,1.3068586985270183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,1.3018453121185303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,1.2860639890034993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,float16,0,1.376479943593343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,1.3537599245707195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,fp8,0,1.3470560709635417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,128,1,fp8,fp8,0,1.4169012705485027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,1.3244266510009766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,1.4003626505533855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,float16,0,0.7238346735636393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,0.7132480144500732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,fp8,0,0.7171093622843424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,128,1,fp8,fp8,0,0.738426685333252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,0.6992853482564291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,0.7277599970499674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,float16,0,0.6870773633321127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,0.6587839921315511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,fp8,0,0.6698986689249674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,128,1,fp8,fp8,0,0.6340053478876749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,0.655130664507548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,0.6207733154296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,float16,0,0.6705013116200765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,0.6571573416392008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,fp8,0,0.6681226889292399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,128,1,fp8,fp8,0,0.6477386554082235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,0.6536639928817749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,0.6350826819737753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,float16,0,0.6732853253682455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,0.6610773404439291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,fp8,0,0.6727306842803955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,128,1,fp8,fp8,0,0.6445120175679525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,0.6609280109405518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,0.63482666015625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,float16,0,0.6818400224049886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,0.6680906613667806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,fp8,0,0.6766239802042643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,128,1,fp8,fp8,0,0.6971200307210287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,0.6641173362731934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,0.6895679632822672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,float16,0,0.3691733280817668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,0.36405332883199054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,fp8,0,0.364464004834493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,128,1,fp8,fp8,0,0.38092267513275146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,0.35902400811513263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,0.3733493487040202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,float16,0,0.34249599774678546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.33562131722768146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,fp8,0,0.342682679494222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,128,1,fp8,fp8,0,0.3246240019798279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.3357013463973999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.31850665807724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,float16,0,0.342522660891215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.334666649500529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,fp8,0,0.340554674466451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,128,1,fp8,fp8,0,0.3330026666323344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.33419732252756756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.32601600885391235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,float16,0,0.34485332171122235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,0.33696532249450684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,fp8,0,0.3442560036977132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,128,1,fp8,fp8,0,0.33272000153859455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,0.33743464946746826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.326255997021993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,float16,0,0.3498239914576213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,0.3447839816411336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,fp8,0,0.34726933638254803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,128,1,fp8,fp8,0,0.3426080147425334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,0.33985598882039386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,0.3351093530654907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,float16,0,0.19682133197784424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,0.1946186621983846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,fp8,0,0.1937333345413208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,128,1,fp8,fp8,0,0.2011573314666748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,0.1898720065752665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.19769599040349325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,float16,0,0.18059200048446655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.17782400051752725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,fp8,0,0.18171199162801108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,128,1,fp8,fp8,0,0.17339199781417847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.17787200212478638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.1678719917933146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,float16,0,0.17974932988484701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.17639466126759848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,fp8,0,0.18017067511876425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,128,1,fp8,fp8,0,0.17536000410715738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.17757866779963175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.17003200451533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,float16,0,0.18172266085942587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.17826133966445923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,fp8,0,0.1816106637318929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,128,1,fp8,fp8,0,0.17587200800577799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.17757334311803183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.17108267545700073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,float16,0,0.18311999241511026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.1795146663983663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,fp8,0,0.18311466773351034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,128,1,fp8,fp8,0,0.1796906590461731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.17959467569986978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.1758133371671041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,float16,0,0.1069546639919281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.10356266299883525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,fp8,0,0.10479999581972758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,128,1,fp8,fp8,0,0.10988799730936687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.10331733028093974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.10713600118954976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,float16,0,0.09659199913342793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.09361066420873006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,fp8,0,0.09636800487836202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,128,1,fp8,fp8,0,0.09325866897900899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.0946613351504008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.09077333410580952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,float16,0,0.09693866968154907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.0934986670811971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,fp8,0,0.09510933359464009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,128,1,fp8,fp8,0,0.09237866600354512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.093231995900472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,float16,0,0.0977280040582021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.09485333164532979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,fp8,0,0.09710400303204854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,128,1,fp8,fp8,0,0.09493866562843323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.09583999713261922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.09328533212343852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,float16,0,0.09948266545931499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.09617599844932556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,fp8,0,0.09788266817728679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,128,1,fp8,fp8,0,0.09530666470527649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.09520000219345093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.09438932935396831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,float16,0,0.06317866841952006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.06206400195757548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,128,1,fp8,fp8,0,0.06506133576234181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,float16,0,0.05983466903368632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.05724266668160757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,fp8,0,0.058373332023620605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,128,1,fp8,fp8,0,0.056373332937558494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.05773333211739858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.056474665800730385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,float16,0,0.058880001306533813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.05842666824658712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,fp8,0,0.059450666109720864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,128,1,fp8,fp8,0,0.0562666654586792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.05708266794681549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.0574186642964681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,float16,0,0.058602665861447654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.05784533421198527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,fp8,0,0.060133333007494606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,128,1,fp8,fp8,0,0.057071998715400696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,float16,0,0.0588266650835673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.05799466868241628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,fp8,0,0.05903466542561849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,128,1,fp8,fp8,0,0.057664001981417336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.05772800246874491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,float16,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.038373333712418876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,fp8,0,0.04002666721741358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,128,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.03834133346875509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.037962667644023895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,float16,0,0.03619733452796936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.03633599976698557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,fp8,0,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,128,1,fp8,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,float16,0,0.03595733394225439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.03589333345492681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,128,1,fp8,fp8,0,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.03728533287843069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,float16,0,0.03706666578849157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,fp8,0,0.03730666637420654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,128,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.037317333122094475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,float16,0,0.03654933224121729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,128,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,128,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,128,1,fp8,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,128,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,float16,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,128,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,128,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.020725333442290623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,128,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,128,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,float16,0,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,float16,0,0.7064053217569987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,0.7059360345204672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,fp8,0,0.7049386501312256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,128,1,fp8,fp8,0,0.6809493700663248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,0.7057920296986898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,0.6822240352630615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,float16,0,0.7056000232696533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,0.7049012978871664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,fp8,0,0.7034719785054525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,128,1,fp8,fp8,0,0.6945652961730957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,0.7029919624328613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,0.6952533721923828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,float16,0,0.712277332941691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,0.7137706279754639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,fp8,0,0.7058026790618896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,128,1,fp8,fp8,0,0.6871253649393717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,0.7068853378295898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,0.6852959791819254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,float16,0,0.7172533671061198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,0.717199961344401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,fp8,0,0.7101653416951498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,128,1,fp8,fp8,0,0.7373066743214926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,0.7100373109181722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,0.7353973388671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,float16,0,0.38834134737650555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,0.38809065024058026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,fp8,0,0.38222400347391766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,128,1,fp8,fp8,0,0.3964800039927165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,0.38096535205841064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,0.39789867401123047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,float16,0,0.36183468500773114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.36139734586079914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,fp8,0,0.35974399248758954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,128,1,fp8,fp8,0,0.34681065877278644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.3593440055847168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.34833065668741864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,float16,0,0.3611573378245036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.3598133325576782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,fp8,0,0.35922666390736896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,128,1,fp8,fp8,0,0.3514133294423421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.3579733371734619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.35152534643809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,float16,0,0.3617386817932129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,0.3628000020980835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,fp8,0,0.361786683400472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,128,1,fp8,fp8,0,0.34891732533772785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.36111998558044434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.3505226771036784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,float16,0,0.3677813212076823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,0.3671146631240845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,fp8,0,0.3633546829223633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,128,1,fp8,fp8,0,0.3625599940617879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,0.36205867926279706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.36157333850860596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,float16,0,0.20361600319544473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,0.20441067218780518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,fp8,0,0.20006932814915976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,128,1,fp8,fp8,0,0.20855466524759927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,0.19986132780710855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.20677334070205688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,float16,0,0.1898933251698812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.18954666455586752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,fp8,0,0.18950400749842325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,128,1,fp8,fp8,0,0.18092799186706543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.18894400199254355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.18011200428009033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,float16,0,0.18822399775187174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.18942934274673462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,fp8,0,0.18824533621470133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,128,1,fp8,fp8,0,0.18241065740585327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.1874720056851705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.18381865819295248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,float16,0,0.18984532356262207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.18987733125686646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,fp8,0,0.1893333395322164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,128,1,fp8,fp8,0,0.18203200896581015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.18796799580256143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.1837386687596639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,float16,0,0.19149333238601685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.19146132469177246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,fp8,0,0.1896053353945414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,128,1,fp8,fp8,0,0.18610666195551553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.18937599658966064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.18808533747990927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,float16,0,0.11030933260917664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.11014933387438457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,fp8,0,0.10918933153152466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,128,1,fp8,fp8,0,0.11241599917411804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.10743467013041179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.1125973363717397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,float16,0,0.10121066371599834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.10243200262387593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,fp8,0,0.10199999809265137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,128,1,fp8,fp8,0,0.09638399879137675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.10108266274134318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.09755200147628784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,float16,0,0.1030453344186147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.1023466686407725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,fp8,0,0.10335999727249146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,128,1,fp8,fp8,0,0.09551466504732768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.10128000378608704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.09697600205739339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,float16,0,0.10126933455467224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.10238933563232422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,fp8,0,0.10310399532318115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,128,1,fp8,fp8,0,0.09866133332252502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.10219732920328777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.09788800279299419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,float16,0,0.10249066352844238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.10341866811116536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,fp8,0,0.10310399532318115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,128,1,fp8,fp8,0,0.10003200173377991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.10274133086204529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.10000000397364299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,float16,0,0.06285333136717479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.06341333190600078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,fp8,0,0.06288533409436543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,128,1,fp8,fp8,0,0.0650133341550827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.06278400123119354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.06520000100135803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,float16,0,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.059018666545550026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,fp8,0,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,128,1,fp8,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05961599946022034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.056757330894470215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,float16,0,0.05896000067392985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,fp8,0,0.05951466659704844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,128,1,fp8,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.056794668237368263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,float16,0,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.05876799921194712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,fp8,0,0.059194669127464294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,128,1,fp8,fp8,0,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.06001600126425425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,float16,0,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.058373332023620605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,fp8,0,0.05881066620349884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,128,1,fp8,fp8,0,0.05806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.0594400018453598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.05683733522891998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,float16,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.03917866696914037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,fp8,0,0.03921066721280416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,128,1,fp8,fp8,0,0.040005333721637726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,128,1,fp8,fp8,0,0.03664533297220866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,float16,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,128,1,fp8,fp8,0,0.03726933399836222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,128,1,fp8,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,float16,0,0.03844266633192698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.03822399924198786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,fp8,0,0.03806933263937632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,128,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,128,1,fp8,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,128,1,fp8,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,128,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,128,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,float16,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,fp8,0,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,float16,0,0.5086506605148315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.508623997370402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,fp8,0,0.5072160164515177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,128,1,fp8,fp8,0,0.47723201910654706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.5055626630783081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,0.47916801770528156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,float16,0,0.5069760084152222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.5060160160064697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,fp8,0,0.5042399962743124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,128,1,fp8,fp8,0,0.4824106693267822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.5048960049947103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,0.4822666645050049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,float16,0,0.5065066814422607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,0.5070879856745402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,fp8,0,0.5042453209559122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,128,1,fp8,fp8,0,0.4780426820119222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.5024640162785848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,0.4771093527475993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,float16,0,0.510208010673523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,0.5099306503931681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,fp8,0,0.5101120074590048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,128,1,fp8,fp8,0,0.4882933298746745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,0.5077226559321085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,0.4885653257369995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,float16,0,0.27456533908843994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,0.27507734298706055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,fp8,0,0.2712533275286357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,128,1,fp8,fp8,0,0.2717546621958415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,0.27085866530736286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.2722240090370178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,float16,0,0.2608746687571208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.26234134038289386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,fp8,0,0.26150399446487427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,128,1,fp8,fp8,0,0.24626133839289346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.261189341545105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.2462559938430786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,float16,0,0.26046933730443317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.26177066564559937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,fp8,0,0.26197866598765057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,128,1,fp8,fp8,0,0.24790932734807333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.25940799713134766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.24782933791478476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,float16,0,0.2611626585324605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.261407991250356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,fp8,0,0.2590559919675191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,128,1,fp8,fp8,0,0.24739199876785278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.2598399917284648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.24718934297561646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,float16,0,0.2635040084520976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.2627146641413371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,fp8,0,0.26125333706537884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,128,1,fp8,fp8,0,0.25145065784454346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.26182399193445843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.2523306608200073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,float16,0,0.1436853309472402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.14412267009417215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,fp8,0,0.14386666814486185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,128,1,fp8,fp8,0,0.14476799964904785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.14382400115331015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.1437493364016215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,float16,0,0.1362879971663157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.13753066460291544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,fp8,0,0.1378933290640513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,128,1,fp8,fp8,0,0.12761066357294717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.1367093324661255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.12772799531618753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,float16,0,0.13615467151006064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.1376213332017263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,fp8,0,0.13700266679128012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,128,1,fp8,fp8,0,0.12785067160924277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.13718400398890176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,float16,0,0.13703466455141702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.13710400462150574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,fp8,0,0.1383573313554128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,128,1,fp8,fp8,0,0.12893866499265036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.13633599877357483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.12962133685747781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,float16,0,0.13808533549308777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.13797332843144736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,fp8,0,0.1377173364162445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,128,1,fp8,fp8,0,0.13225600123405457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.13753066460291544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.13130133350690207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,float16,0,0.07915733257929485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.0804906686147054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,fp8,0,0.07984533409277599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,128,1,fp8,fp8,0,0.08097599943478902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.07905066510041554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,float16,0,0.07674666742483775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.07740266621112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,fp8,0,0.07785066465536754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,128,1,fp8,fp8,0,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.07750933369000752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.07276266813278198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,float16,0,0.07664533456166585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,fp8,0,0.07650133470694225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,128,1,fp8,fp8,0,0.07422933479150136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.07741333544254303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.07241066793600719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,float16,0,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.07703466713428497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,fp8,0,0.07720000048478444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,128,1,fp8,fp8,0,0.07342933118343353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.07260266443093617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,float16,0,0.07657599945863088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.0764213353395462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,fp8,0,0.07653333246707916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,128,1,fp8,fp8,0,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.07675733168919881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.07460266848405202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,float16,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.04622933268547058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,128,1,fp8,fp8,0,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.04738133152325948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,float16,0,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,128,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.045834665497144066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,float16,0,0.04604266583919525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.045423999428749084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,fp8,0,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,128,1,fp8,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.043925335009892784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,float16,0,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,128,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.0460746685663859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,float16,0,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,128,1,fp8,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,fp8,0,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,128,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,float16,0,0.03197866678237915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.029861333469549816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,fp8,0,0.02992533395687739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,128,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.030058667063713074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,float16,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.031850665807724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,fp8,0,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,128,1,fp8,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.029930666089057922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,float16,0,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,128,1,fp8,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,128,1,fp8,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.030080000559488933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,fp8,0,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,128,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,float16,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,float16,0,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,128,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,128,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,float16,0,0.4073760112126668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.4057066837946574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,fp8,0,0.40404268105824787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,128,1,fp8,fp8,0,0.3762400150299072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.4047733147939046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.37587201595306396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,float16,0,0.4050453503926595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.405290683110555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,fp8,0,0.4039040009180705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,128,1,fp8,fp8,0,0.3770506779352824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.40343467394510907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.37810667355855304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,float16,0,0.40562665462493896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.40431467692057294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,fp8,0,0.4025973478953044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,128,1,fp8,fp8,0,0.3760586579640706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.4038240114847819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.3752906719843547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,float16,0,0.40545066197713214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.4051520029703776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,fp8,0,0.40413331985473633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,128,1,fp8,fp8,0,0.37956265608469647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.40338134765625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.38125332196553546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,float16,0,0.21637866894404092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.21588265895843506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,fp8,0,0.21530133485794067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,128,1,fp8,fp8,0,0.20837332804997763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.2142933408419291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.20965866247812906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,float16,0,0.20865066846211752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.2084640065828959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,fp8,0,0.2079040010770162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,128,1,fp8,fp8,0,0.19314666589101157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.20789867639541626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.19369600216547647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,float16,0,0.2093600034713745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.20905599991480509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,fp8,0,0.20955200990041098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,128,1,fp8,fp8,0,0.19286932547887167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.20785599946975708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.19348265727361044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,float16,0,0.20921067396799722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.21080533663431802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,fp8,0,0.20940266052881876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,128,1,fp8,fp8,0,0.19349332650502524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.20756800969441733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.19475199778874716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,float16,0,0.20964266856511435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.20961600542068481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,0.6474666595458984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,fp8,0,0.2085919976234436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,128,1,fp8,fp8,0,0.19828800360361734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.20804266134897867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,float16,0,0.11351466178894043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.19751467307408652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.11451733112335205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,fp8,0,0.11541866262753804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,128,1,fp8,fp8,0,0.1129973332087199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.11567999919255574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.11169067025184631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,float16,0,0.1116480032602946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.11122666796048482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,fp8,0,0.11364799737930298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,128,1,fp8,fp8,0,0.10469333330790202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.11335466305414836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.10408000151316325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,float16,0,0.11195199688275655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.11152533690134685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,fp8,0,0.11191466450691223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,128,1,fp8,fp8,0,0.10525866349538167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.11340266466140747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.10427199800809224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,float16,0,0.11294933160146077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.11120532949765523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,fp8,0,0.11333333452542622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,128,1,fp8,fp8,0,0.10520533720652263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.11359999577204387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,float16,0,0.11246400078137715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.11133866508801778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,fp8,0,0.11194133758544922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,128,1,fp8,fp8,0,0.10607999563217163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.1053600013256073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,float16,0,0.06632000207901001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.06677866478761037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,fp8,0,0.06618666648864746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,128,1,fp8,fp8,0,0.06448533137639363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.06617600222428639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,float16,0,0.06454400221506755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,fp8,0,0.06418666740258534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,128,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.0641653339068095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.06010666489601135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,128,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.0644053320089976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,float16,0,0.06458666423956554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,fp8,0,0.0641653339068095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,128,1,fp8,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.06459733347098033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,float16,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,fp8,0,0.06445333361625671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,128,1,fp8,fp8,0,0.06118399898211161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.06418666740258534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.06085866689682007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,float16,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,128,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,float16,0,0.04020266731580099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,fp8,0,0.0402453343073527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,128,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,float16,0,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,fp8,0,0.03799466788768768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,128,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,float16,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,128,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,float16,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.038032000263532005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,128,1,fp8,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,128,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,float16,0,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.0277813325325648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,float16,0,0.027744000156720478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,128,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,128,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.019802667200565338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,128,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,float16,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,128,1,fp8,fp8,0,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,128,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,128,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,float16,0,0.35425599416097003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.35461334387461346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,fp8,0,0.3537333408991496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,128,1,fp8,fp8,0,0.32585599025090534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.35476799805959064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.3266666730244954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,float16,0,0.3545973300933838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.35313598314921063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,fp8,0,0.3532373507817586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,128,1,fp8,fp8,0,0.32734400033950806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.35123733679453534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.3270240028699239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,float16,0,0.353221337000529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.3530720074971517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,fp8,0,0.35345598061879474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,128,1,fp8,fp8,0,0.3285226623217265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.3531786600748698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.32737600803375244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,float16,0,0.35604266325632733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.3534613450368245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,fp8,0,0.3545440038045247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,128,1,fp8,fp8,0,0.32781867186228436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.35328535238901776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.32947733004887897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,float16,0,0.1863200068473816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.1862773299217224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,fp8,0,0.18741865952809653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,128,1,fp8,fp8,0,0.18077333768208823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.18733332554499307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.18078400691350302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,float16,0,0.1832533280054728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.18318400780359903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,fp8,0,0.18438400824864706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,128,1,fp8,fp8,0,0.17248000701268515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.18318400780359903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.17142399152119955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,float16,0,0.18297600746154785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.18345600366592407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,fp8,0,0.18489599227905273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,128,1,fp8,fp8,0,0.17241599162419638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.18354666233062744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.17256534099578857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,float16,0,0.18331199884414673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.18251200517018637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,fp8,0,0.1828320026397705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,128,1,fp8,fp8,0,0.17072532574335733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.18318400780359903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.1723573406537374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,float16,0,0.18279467026392618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.18333866198857626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,fp8,0,0.1833440065383911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,128,1,fp8,fp8,0,0.17301867405573526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.18305599689483643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.1730560064315796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,float16,0,0.10025599598884583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.10084799925486247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,fp8,0,0.10061333576838176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,128,1,fp8,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.09669867157936096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,float16,0,0.09940800070762634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.0993386705716451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,fp8,0,0.0992746651172638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,128,1,fp8,fp8,0,0.09332266449928284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.09924800197283427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.09353599945704143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,float16,0,0.0990133285522461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.09874133268992107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,128,1,fp8,fp8,0,0.09292266766230266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.09930133819580078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.09351999560991923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,float16,0,0.09918933113416036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.09923733274141948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,fp8,0,0.0990773340066274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,128,1,fp8,fp8,0,0.09309333562850952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.09911466638247173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.09276266892751057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,float16,0,0.09929600358009338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.09923199812571208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,fp8,0,0.09878933429718018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,128,1,fp8,fp8,0,0.09309333562850952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.09931199749310811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.09455999732017517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,float16,0,0.0580320010582606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,128,1,fp8,fp8,0,0.05434666574001312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,float16,0,0.057722667853037514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,128,1,fp8,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,float16,0,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.05773866673310598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,128,1,fp8,fp8,0,0.05417066812515259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,float16,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.057962665955225624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,fp8,0,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,128,1,fp8,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,float16,0,0.05635733405749003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,fp8,0,0.05796800057093302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,128,1,fp8,fp8,0,0.05421866476535797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.0543093333641688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,float16,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,128,1,fp8,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.034261333445707955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,float16,0,0.03614933292071024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,128,1,fp8,fp8,0,0.033786666889985405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.03620799879233042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,float16,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.03612266729275385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,128,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.03383466601371765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,128,1,fp8,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,128,1,fp8,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,float16,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,fp8,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,128,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,128,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,fp8,0,0.020655999581019085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,fp8,0,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.020410666863123577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,128,1,fp8,fp8,0,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,128,1,fp8,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,128,1,fp8,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,128,1,fp8,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,float16,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,float16,0,2.7293761571248374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,0.26479466756184894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,fp8,0,2.7424586613972983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,128,1,fp8,fp8,0,2.494570732116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,15.859424591064453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,15.917716979980469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,float16,0,2.744778633117676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,14.651130676269531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,fp8,0,2.765962600708008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,128,1,fp8,fp8,0,2.51694393157959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,15.90932846069336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,15.917332967122396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,float16,0,2.7802346547444663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,14.687957763671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,fp8,0,2.8037761052449546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,128,1,fp8,fp8,0,2.5653279622395835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,15.957472483317057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,15.987733205159506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,float16,0,1.5920586585998535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,14.720277150472006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,fp8,0,1.6282240549723308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,8.281989415486654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,128,1,fp8,fp8,0,1.515168031056722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,8.318453470865885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,7.646298726399739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,float16,0,1.4139307339986165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,fp8,0,1.4282080332438152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,128,1,fp8,fp8,0,1.2978453636169434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,8.034095764160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,0.9427146911621094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,float16,0,1.4239786465962727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,8.044890721638998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,7.42298698425293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,fp8,0,1.4376266797383626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,128,1,fp8,fp8,0,1.3077973524729412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,8.051701227823893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,float16,0,1.4385493596394856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,8.068005243937174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,7.433530807495117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,fp8,0,1.4523253440856934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,128,1,fp8,fp8,0,1.3267359733581543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,8.081205368041992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,float16,0,0.8787199656168619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,8.096778869628906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,7.4537811279296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,fp8,0,0.8936213652292887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,128,1,fp8,fp8,0,0.8394986788431803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,4.270053227742513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,float16,0,0.7913013299306234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,3.953157424926758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,4.317418734232585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,fp8,0,0.7982026735941569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,128,1,fp8,fp8,0,0.7328480084737142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,4.156079928080241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,float16,0,0.7953120072682699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,3.8463414510091147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,4.159077326456706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,fp8,0,0.8017493089040121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,128,1,fp8,fp8,0,0.7364853223164877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,4.182709376017253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,4.1700747807820635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,float16,0,0.8038986523946127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,3.852954546610514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,fp8,fp8,0,0.7476480007171631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,4.17250124613444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,4.195146560668945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,float16,0,0.5970293283462524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,3.8555787404378257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,fp8,0,0.5968480110168457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,128,1,fp8,fp8,0,0.5574880043665568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,2.3452746073404946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,2.344485282897949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,2.1621972719828286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,float16,0,0.5971466700236002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,fp8,0,0.5972586472829183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,128,1,fp8,fp8,0,0.558240016301473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,2.328106721242269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,2.160879929860433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,2.327866713205973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,float16,0,0.5967573324839274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,fp8,0,0.596560001373291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,2.3281493186950684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,128,1,fp8,fp8,0,0.5582186778386434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,2.16046937306722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,2.32968537012736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,float16,0,0.5958826541900635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,fp8,0,0.5959626833597819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,2.3320693969726562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,128,1,fp8,fp8,0,0.5581226746241251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,2.1611733436584473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,2.3334080378214517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,float16,0,2.0209973653157554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,fp8,0,2.0350613594055176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,128,1,fp8,fp8,0,1.8453866640726726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,9.358624140421549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,9.377077102661133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,8.641232172648111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,float16,0,2.035605271657308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,fp8,0,2.0526933670043945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,128,1,fp8,fp8,0,1.863866647084554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,9.37876828511556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,9.40006955464681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,float16,0,2.05838934580485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,8.657808303833008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,fp8,0,2.077237288157145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,128,1,fp8,fp8,0,1.894927978515625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,9.424079895019531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,9.439050674438477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,float16,0,1.1948373317718506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,8.69214948018392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,fp8,0,1.2230559984842937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,128,1,fp8,fp8,0,1.1366346677144368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,4.945610682169597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,4.9746294021606445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,4.573322614034017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,float16,0,1.0647146701812744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,fp8,0,1.074896017710368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,128,1,fp8,fp8,0,0.9778026739756266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,4.769994735717773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,4.783781369527181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,4.408960024515788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,float16,0,1.070367972056071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,fp8,0,1.0817440350850422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,128,1,fp8,fp8,0,0.98471466700236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,4.783114751180013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,4.794581413269043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,4.4185441335042315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,float16,0,1.0826666355133057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,fp8,0,1.0947946707407634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,128,1,fp8,fp8,0,0.9986826578776041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,4.80187193552653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,4.8154293696085615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,float16,0,0.6582933266957601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,4.433306694030762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,fp8,0,0.6756213506062826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,128,1,fp8,fp8,0,0.6344319979349772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,2.5745654106140137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,2.591386636098226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,2.3921119372049966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,float16,0,0.5993280013402303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,fp8,0,0.6036320130030314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,128,1,fp8,fp8,0,0.5541066726048788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,2.502474625905355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,2.4943572680155435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,2.308842658996582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,float16,0,0.6012959877649943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,fp8,0,0.607258677482605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,128,1,fp8,fp8,0,0.5587679942448934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,2.502117315928141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,2.5017226537068686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,2.3137173652648926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,float16,0,0.6069759925206503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,fp8,0,0.6126559972763062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,128,1,fp8,fp8,0,0.565280000368754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,2.5080374081929526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,2.5132907231648765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,2.3175360361735025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,float16,0,0.4531466563542684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,1.4490399360656738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,fp8,fp8,0,0.4233599901199341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,fp8,0,0.4552319844563802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,1.3375412623087566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,1.4480479558308919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,float16,0,0.4510720173517863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,fp8,0,0.4513813257217407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,1.4360052744547527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,128,1,fp8,fp8,0,0.42290135224660236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,1.436303933461507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,1.3347679773966472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,float16,0,0.45287466049194336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,fp8,0,0.45396800835927326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,1.4388267199198406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,128,1,fp8,fp8,0,0.4227946599324544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,1.334368069966634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,1.439029375712077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,float16,0,0.4532639980316162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,fp8,0,0.4524799982706706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,1.4414025942484539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,128,1,fp8,fp8,0,0.4241386651992798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,1.44157870610555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,1.335327943166097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,float16,0,1.6800106366475422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,fp8,0,1.695365269978841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,128,1,fp8,fp8,0,1.5337546666463215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,6.739706675211589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,6.76307741800944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,6.223029454549153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,float16,0,1.6908960342407227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,fp8,0,1.7058186531066895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,128,1,fp8,fp8,0,1.5470293362935383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,6.760437647501628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,6.77888552347819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,6.235690434773763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,float16,0,1.7092587153116863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,fp8,0,1.7287839253743489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,128,1,fp8,fp8,0,1.5713386535644531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,6.7934614817301435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,6.816666920979817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,float16,0,0.9970453580220541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,6.262213389078776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,fp8,0,1.0198240280151367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,128,1,fp8,fp8,0,0.9479359785715739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,3.595184008280436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,3.6170291900634766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,3.3289012908935547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,float16,0,0.889573335647583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,fp8,0,0.8983573118845621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,128,1,fp8,fp8,0,0.8171839714050293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,3.4556105931599936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,3.4615465799967446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,3.1955947875976562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,float16,0,0.8956053256988525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,fp8,0,0.9033973217010498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,128,1,fp8,fp8,0,0.8219786485036215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,3.4646825790405273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,3.4705867767333984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,3.2000481287638345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,float16,0,0.9045546849568685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,fp8,0,0.9151893456776937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,128,1,fp8,fp8,0,0.8354559739430746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,3.4777119954427085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,3.4874401092529297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,3.213535944620768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,float16,0,0.5521119832992554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,fp8,0,0.5670986572901408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,1.884346644083659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,128,1,fp8,fp8,0,0.532693346341451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,1.8980213801066081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,1.7514079411824544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,float16,0,0.5026613473892212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,fp8,0,0.5061226685841879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,1.8149654070536296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,128,1,fp8,fp8,0,0.4659413496653239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,1.6836907068888347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,1.819994608561198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,float16,0,0.5048319896062216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,1.8198026021321614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,fp8,0,0.5098880132039388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,128,1,fp8,fp8,0,0.46954667568206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,1.8262292544047039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,1.687162717183431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,float16,0,0.5092853307723999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,fp8,0,0.5151093403498331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,1.8256905873616536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,128,1,fp8,fp8,0,0.4739946524302165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,1.6926560401916504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,1.8323146502176921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,float16,0,0.3797599871953328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,fp8,0,0.38016533851623535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,1.0762453079223633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,128,1,fp8,fp8,0,0.3538186550140381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,1.0764533678690593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,0.9946026802062988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,float16,0,0.37776001294453937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,fp8,0,0.378661314646403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,1.0777973333994548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,128,1,fp8,fp8,0,0.35340265432993573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,1.067797342936198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,0.9925813674926758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,float16,0,0.37768534819285077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,1.069642702738444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,fp8,0,0.3774720033009847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,128,1,fp8,fp8,0,0.3532586495081584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,1.0707306861877441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,0.9955999851226807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,float16,0,0.37805867195129395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,fp8,0,0.37965333461761475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,1.070805311203003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,128,1,fp8,fp8,0,0.3553119897842407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,1.0716479619344075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,0.9958613713582357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,float16,0,2.6491732597351074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,fp8,0,2.668309211730957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,128,1,fp8,fp8,0,2.4199679692586265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,9.033173243204752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,8.3166135152181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,9.033023834228516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,float16,0,2.671567916870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,fp8,0,2.693061192830404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,128,1,fp8,fp8,0,2.4428159395853677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,9.044309616088867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,8.332890828450521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,9.070069630940756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,float16,0,2.7042773564656577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,fp8,0,2.7267465591430664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,128,1,fp8,fp8,0,2.4887520472208657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,9.105119705200195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,8.386005401611328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,9.123589197794596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,float16,0,1.5239146550496419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,fp8,0,1.5576160748799641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,4.777210553487142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,128,1,fp8,fp8,0,1.4437546730041504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,4.421680132548015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,4.80894406636556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,float16,0,1.3446720441182454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,fp8,0,1.3570613861083984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,4.552805264790853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,4.561893463134766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,4.1960798899332685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,float16,0,1.3511306444803874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,4.567040125528972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,fp8,0,1.364810625712077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,128,1,fp8,fp8,0,1.2366666793823242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,4.581173261006673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,4.207258542378743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,float16,0,1.368127981821696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,4.591877301534017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,fp8,0,1.3819252649943035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,128,1,fp8,fp8,0,1.2555893262227376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,4.606858571370442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,4.226256052652995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,float16,0,0.7997600237528483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,2.4519947369893393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,fp8,0,0.8188800017038981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,128,1,fp8,fp8,0,0.7623679637908936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,2.471813360850016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,2.2747252782185874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,float16,0,0.7160960038503011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,2.3456106185913086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,fp8,0,0.7201759815216064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,128,1,fp8,fp8,0,0.6583093404769897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,2.350250720977783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,2.1657066345214844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,float16,0,0.71888534228007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,2.352133274078369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,fp8,0,0.7262346744537354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,128,1,fp8,fp8,0,0.6632266839345297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,2.358544031778971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,2.1706080436706543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,float16,0,0.7277493476867676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,2.364960034688314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,fp8,0,0.7342987060546875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,128,1,fp8,fp8,0,0.6719573338826498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,2.372213363647461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,2.180842717488607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,float16,0,0.44707731405893963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,1.2976213296254475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,fp8,0,0.45746131738026935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,128,1,fp8,fp8,0,0.428821325302124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,1.309664011001587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,1.2095946470896404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,float16,0,0.40491731961568195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,1.2452106475830078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,fp8,0,0.40856532255808514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,128,1,fp8,fp8,0,0.37573333581288654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,1.2482133706410725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,1.1549759705861409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,float16,0,0.4073919852574666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,1.2491626739501953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,fp8,0,0.4106026490529378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,128,1,fp8,fp8,0,0.37814398606618244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,1.2529706954956055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,1.1583786805470784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,float16,0,0.41251198450724286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,1.2594506740570068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,fp8,0,0.41517865657806396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,128,1,fp8,fp8,0,0.3847200075785319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,1.2593279679616292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,1.1619199911753337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,float16,0,0.31016000111897785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,0.7598400115966797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,fp8,0,0.3096746603647868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,128,1,fp8,fp8,0,0.2892213265101115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,0.7605760097503662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,0.7029813130696615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,float16,0,0.3083359996477763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,0.7524800300598145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,fp8,0,0.3079093297322591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,128,1,fp8,fp8,0,0.28597333033879596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,0.7520480155944824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,0.6992853482564291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,float16,0,0.3086400032043457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,0.7526240348815918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,fp8,0,0.3081546624501546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,128,1,fp8,fp8,0,0.28915200630823773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,0.7526453336079916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,0.7003413041432699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,float16,0,0.3091040054957072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,0.7550880114237467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,fp8,0,0.30798399448394775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,128,1,fp8,fp8,0,0.2877546747525533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,0.7542293071746826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,0.7013333638509115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,float16,0,1.9612693786621094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,5.463733037312825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,fp8,0,1.9789600372314453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,128,1,fp8,fp8,0,1.7872106234232585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,5.478335698445638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,5.027279853820801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,float16,0,1.9743733406066895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,5.484997431437175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,fp8,0,1.9926506678263347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,128,1,fp8,fp8,0,1.8054773012797039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,5.501893361409505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,5.045946756998698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,float16,0,1.9987146059672039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,5.520949045817058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,fp8,0,2.0196320215861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,128,1,fp8,fp8,0,1.8367679913838704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,5.540063858032227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,5.077856063842773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,float16,0,1.1431946754455566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,2.9417174657185874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,fp8,0,1.1677333513895671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,128,1,fp8,fp8,0,1.0818719863891602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,2.966250737508138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,2.7246932983398438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,float16,0,1.0121013323465984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,2.7817068099975586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,fp8,0,1.0199999809265137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,128,1,fp8,fp8,0,0.9229066371917725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,2.7903146743774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,2.563701311747233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,float16,0,1.0171840190887451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,2.7918453216552734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,fp8,0,1.0265493392944336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,128,1,fp8,fp8,0,0.9299626350402832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,2.800741195678711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,2.568666617075602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,float16,0,1.0299839973449707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,2.809856096903483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,fp8,0,1.0404000282287598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,128,1,fp8,fp8,0,0.9461493492126465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,2.8201119105021157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,2.5845227241516113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,float16,0,0.6047199964523315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,1.5227573712666829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,fp8,0,0.618229349454244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,128,1,fp8,fp8,0,0.5760533412297567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,1.5370079676310222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,1.4159199396769206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,float16,0,0.5412106513977051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,1.4448906580607097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,fp8,0,0.5467626651128134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,128,1,fp8,fp8,0,0.499941349029541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,1.4493813514709473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,1.3346187273661296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,float16,0,0.5437173446019491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,1.449295997619629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,fp8,0,0.5505066712697347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,128,1,fp8,fp8,0,0.502560019493103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,1.4553653399149578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,1.338213284810384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,float16,0,0.5504106680552164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,1.4596373240152996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,fp8,0,0.5563786824544271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,128,1,fp8,fp8,0,0.510101318359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,1.465178648630778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,1.346768061319987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,float16,0,0.34059735139211017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,0.8171679973602295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,fp8,0,0.3490026791890462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,128,1,fp8,fp8,0,0.32778133948644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,0.8263519605000814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,0.7639626661936442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,float16,0,0.30485333998998004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,0.7756160100301107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,fp8,0,0.309717337290446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,128,1,fp8,fp8,0,0.28799466292063397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,0.7787573337554932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,0.7212533156077067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,float16,0,0.3081173300743103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,0.7795573075612386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,fp8,0,0.3123413324356079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,128,1,fp8,fp8,0,0.2897653381029765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,0.7831733226776123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,0.7264906565348307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,float16,0,0.3137386639912923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,0.78548264503479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,fp8,0,0.3162933389345805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,128,1,fp8,fp8,0,0.2932159900665283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,0.7888906796773275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,0.7296586831410726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,float16,0,0.23452800512313843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,0.4941279888153076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,fp8,0,0.2363199989000956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,128,1,fp8,fp8,0,0.2217973272005717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,0.4944693247477214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,0.45784000555674237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,float16,0,0.23195199171702066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,0.4862613280614217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,fp8,0,0.23219732443491617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,128,1,fp8,fp8,0,0.21896533171335855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,0.4882346789042155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,0.45456000169118244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,float16,0,0.23291732867558798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,0.48866132895151776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,fp8,0,0.2322559952735901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,128,1,fp8,fp8,0,0.21786133448282877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,0.4886080026626587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,0.45481598377227783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,float16,0,0.2321760058403015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,0.4897119998931885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,fp8,0,0.23257599274317423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,128,1,fp8,fp8,0,0.2181653380393982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,0.49027733008066815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,0.45560534795125324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,float16,0,2.6064319610595703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,5.538096110026042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,fp8,0,2.6233867009480796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,128,1,fp8,fp8,0,2.3763252894083657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,5.559189478556315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,5.097344080607097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,float16,0,2.6267360051472983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,5.5679677327473955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,fp8,0,2.6462772687276206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,128,1,fp8,fp8,0,2.40066130956014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,5.587408065795898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,5.1229813893636065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,float16,0,2.6645919481913247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,5.616117477416992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,fp8,0,2.682821273803711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,128,1,fp8,fp8,0,2.4433439572652182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,5.635152180989583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,5.1643679936726885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,float16,0,1.4855893452962239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,3.004261334737142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,fp8,0,1.5135146776835124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,128,1,fp8,fp8,0,1.4028053283691406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,3.0328801472981772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,2.7794453303019204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,float16,0,1.305450677871704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,2.784917195638021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,fp8,0,1.3177653153737385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,128,1,fp8,fp8,0,1.1902613639831543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,2.796442667643229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,2.5576213200887046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,float16,0,1.3122933705647786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,fp8,0,1.3258453210194905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,2.795109430948893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,128,1,fp8,fp8,0,1.199455976486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,2.8073813120524087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,2.5706772804260254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,float16,0,1.3400853474934895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,fp8,0,1.3430612881978352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,2.818309466044108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,128,1,fp8,fp8,0,1.21670929590861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,2.8313280741373696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,2.5870186487833657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,float16,0,0.7657120227813721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,fp8,0,0.78383469581604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,1.5380533536275227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,128,1,fp8,fp8,0,0.7274933656056722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,1.5562879244486492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,1.4268959363301594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,float16,0,0.6794506708780924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,fp8,0,0.6977653503417969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,1.4305386543273926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,128,1,fp8,fp8,0,0.6225333213806152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,1.4370932579040527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,1.3193653424580891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,float16,0,0.6831680138905843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,fp8,0,0.6914079984029134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,1.4379359881083171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,128,1,fp8,fp8,0,0.6279360055923462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,1.449120044708252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,1.326858679453532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,float16,0,0.6917280356089274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,1.4458506902058919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,fp8,0,0.6999786694844564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,128,1,fp8,fp8,0,0.6364533503850301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,1.453114668528239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,1.33460267384847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,float16,0,0.4092746575673421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,0.8063680330912272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,fp8,0,0.4198773304621379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,128,1,fp8,fp8,0,0.3906666835149129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,0.825055996576945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,0.7538560231526693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,float16,0,0.36450668176015216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,fp8,0,0.3682933251063029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,0.7536853154500326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,128,1,fp8,fp8,0,0.33904000123341876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,0.756335973739624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,0.6984426975250244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,float16,0,0.36897599697113037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,0.7564640045166016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,fp8,0,0.37093865871429443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,128,1,fp8,fp8,0,0.3409973382949829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,0.7590933640797933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,0.717199961344401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,float16,0,0.37223998705546063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,fp8,0,0.37566399574279785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,0.7633333206176758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,128,1,fp8,fp8,0,0.34699201583862305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,0.7687093416849772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,0.707589308420817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,float16,0,0.23313599824905396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,0.44226133823394775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,fp8,0,0.24381333589553833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,128,1,fp8,fp8,0,0.22503467400868735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,0.44811733563741046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,0.4183146556218465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,float16,0,0.20497600237528482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,fp8,0,0.20729066928227743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,0.4124480088551839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,128,1,fp8,fp8,0,0.19578667481740317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,0.41422398885091144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,0.38759998480478924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,float16,0,0.20865066846211752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,0.414352019627889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,fp8,0,0.20988800128300986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,128,1,fp8,fp8,0,0.19769599040349325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,0.4150293270746867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,0.3883519967397054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,float16,0,0.2115573287010193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,fp8,0,0.21379733085632324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,0.41767998536427814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,128,1,fp8,fp8,0,0.2018773357073466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,0.41975998878479004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,0.3942026694615682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,float16,0,0.16453333695729574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,fp8,0,0.1647040049235026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.2815093398094177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,128,1,fp8,fp8,0,0.1543839971224467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.2812959949175517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,0.26098666588465375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,float16,0,0.16244799892107645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,fp8,0,0.16173866391181946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.277130663394928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,128,1,fp8,fp8,0,0.15266666809717813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.2772266666094462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,0.2591200073560079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,float16,0,0.1628426710764567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.2759946584701538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,fp8,0,0.1625706652800242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,128,1,fp8,fp8,0,0.15060266852378845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.2773173252741496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,0.25887467463811237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,float16,0,0.16235199570655823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,fp8,0,0.16270933548609415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.2784480055173238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,128,1,fp8,fp8,0,0.15198399623235068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.2773226698239644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,0.2592693368593852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,float16,0,1.930890719095866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,3.491135915120443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,fp8,0,1.9473867416381836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,128,1,fp8,fp8,0,1.7560853958129883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,3.502959887186686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,3.208677291870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,float16,0,1.9439199765523274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,fp8,0,1.9597172737121582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,3.5090719858805337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,128,1,fp8,fp8,0,1.7747413317362468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,3.5254027048746743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,3.2231839497884116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,float16,0,1.9727253913879395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,fp8,0,1.9861386617024739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,3.5363521575927734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,128,1,fp8,fp8,0,1.8053332964579265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,3.55406920115153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,3.2560907999674478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,float16,0,1.114240010579427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,1.9211626052856445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,fp8,0,1.1367626984914143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,128,1,fp8,fp8,0,1.0527733167012532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,1.9433706601460774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,float16,0,0.9825066725413004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,1.7873652776082356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,1.7689013481140137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,fp8,0,0.9970346291859945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,128,1,fp8,fp8,0,0.8950719833374023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,1.7766027450561523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,1.6272746721903484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,float16,0,1.0072853565216064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,fp8,0,0.9979466597239176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,1.7778933842976887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,128,1,fp8,fp8,0,0.901850700378418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,1.7868107159932454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,1.6328800519307454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,float16,0,1.0013386408487956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,fp8,0,1.0123039881388347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,1.794650713602702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,128,1,fp8,fp8,0,0.9165120124816895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,1.8070665995279949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,1.649845282236735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,float16,0,0.5796159903208414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,fp8,0,0.5919520060221354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,0.9908906618754069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,128,1,fp8,fp8,0,0.5494399865468343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,1.0045706431070964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,0.9256906509399414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,float16,0,0.5143200159072876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,fp8,0,0.51801598072052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,0.9146239757537842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,128,1,fp8,fp8,0,0.47174398104349774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,0.9195679823557535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,0.8465706507364908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,float16,0,0.5157653490702311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,fp8,0,0.5220746596654257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,0.9195146560668945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,128,1,fp8,fp8,0,0.4842720031738281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,0.9253333409627279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,0.848586638768514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,float16,0,0.5234453280766805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,0.9291573365529379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,fp8,0,0.5294560194015503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,128,1,fp8,fp8,0,0.4824426571528117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,0.9344267050425211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,0.8586880366007487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,float16,0,0.3110613425572713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,fp8,0,0.31932266553243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,0.5271360079447428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,128,1,fp8,fp8,0,0.2980213363965352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,0.5346719821294149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,0.4943893353144328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,float16,0,0.27647467454274494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,0.4861973524093628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,fp8,0,0.27960532903671265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,128,1,fp8,fp8,0,0.25909332434336346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,0.4980533520380656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,0.4538506666819255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,float16,0,0.27795199553171795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,fp8,0,0.2813546657562256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,0.48786667982737225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,128,1,fp8,fp8,0,0.2606559991836548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,0.49210135142008465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,0.45741331577301025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,float16,0,0.2837120095888774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,fp8,0,0.28596266110738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,0.4930986563364665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,128,1,fp8,fp8,0,0.26545600096384686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,0.498149315516154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,0.4599200089772542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,float16,0,0.17898666858673096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,fp8,0,0.18243199586868286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,0.2946186661720276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,128,1,fp8,fp8,0,0.17325333754221597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,0.3000906705856323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,0.2855733235677083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,float16,0,0.15638933579126993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.2785973350207011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,fp8,fp8,0,0.15108799934387207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.2732906738917033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,0.2531840006510417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,float16,0,0.1581653356552124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.27345067262649536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,fp8,0,0.16074132919311523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,128,1,fp8,fp8,0,0.1502079963684082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.27510400613149005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,0.2557973265647888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,float16,0,0.15981333454449972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.2739413380622864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,fp8,0,0.16133333245913187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,128,1,fp8,fp8,0,0.15377066532770792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.2778186599413554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,0.26051199436187744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,float16,0,0.12571199735005698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.19317332903544107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,fp8,0,0.1239413321018219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,128,1,fp8,fp8,0,0.11880000432332356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.19324799378712973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.18189332882563272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,float16,0,0.12287466724713643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.19155200322469076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,fp8,0,0.1222773293654124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,128,1,fp8,fp8,0,0.11758933464686076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.19202667474746704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.18000000715255737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,float16,0,0.12398399909337361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.19142399231592813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,fp8,0,0.12281066179275513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,128,1,fp8,fp8,0,0.11591999729474385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.19124799966812134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.17964265743891397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,float16,0,0.12359467148780823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.19132266441980997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,fp8,0,0.121888001759847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,128,1,fp8,fp8,0,0.11758400003115337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.1921493411064148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.17945599555969238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,float16,0,2.6239306131998696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,3.8372214635213218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,fp8,0,2.617413361867269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,128,1,fp8,fp8,0,2.331045309702555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,3.8302132288614907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,3.467914581298828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,float16,0,2.634069283803304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,3.84934933980306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,fp8,0,2.6381492614746094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,128,1,fp8,fp8,0,2.360741297403971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,3.852378527323405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,3.49287478129069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,float16,0,2.7334559758504233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,3.953520139058431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,fp8,0,2.7221546173095703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,128,1,fp8,fp8,0,2.3961119651794434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,3.9424638748168945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,3.530714670817057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,float16,0,1.4699412981669109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,2.102442741394043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,fp8,0,1.491418679555257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,128,1,fp8,fp8,0,1.3844587008158367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,2.12554136912028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,1.9613812764485676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,float16,0,1.288325309753418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,1.89739195505778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,fp8,0,1.2978346347808838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,128,1,fp8,fp8,0,1.1694186528523762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,1.9080746968587239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,1.739967981974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,float16,0,1.2967466513315837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,1.9084906578063965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,fp8,0,1.3065120379130046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,128,1,fp8,fp8,0,1.180399974187215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,1.921573321024577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,1.7528479894002278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,float16,0,1.3148746490478516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,1.923477331797282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,fp8,0,1.325984001159668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,128,1,fp8,fp8,0,1.1990880171457927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,1.9415946006774902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,1.7725067138671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,float16,0,0.7499946753184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,1.0714720090230305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,fp8,0,0.7634826501210531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,128,1,fp8,fp8,0,0.7077439626057943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,1.0861813227335613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,1.0022613207499187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,float16,0,0.6615093151728312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,0.9713173707326254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,fp8,0,0.6668960253397623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,128,1,fp8,fp8,0,0.6035733222961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,0.9776480197906494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,0.8950613339742025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,float16,0,0.6661653518676758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,0.9774613380432129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,fp8,0,0.6722880204518636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,128,1,fp8,fp8,0,0.6092426776885986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,0.983130693435669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,0.8998346328735352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,float16,0,0.6752853393554688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,0.986245314280192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,fp8,0,0.681114673614502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,128,1,fp8,fp8,0,0.6169493198394775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,0.9935039679209391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,0.9105386734008789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,float16,0,0.39290134112040204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,0.5594079891840616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,fp8,0,0.4022879997889201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,128,1,fp8,fp8,0,0.3740853468577067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,0.5685386657714844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,0.526032010714213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,float16,0,0.34614400068918866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,0.505791982014974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,fp8,0,0.34815998872121173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,128,1,fp8,fp8,0,0.3216213385264079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,0.5091840028762817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,0.47205865383148193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,float16,0,0.34946131706237793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,0.5087413390477499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,fp8,0,0.3521866798400879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,fp8,0,0.812549352645874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,128,1,fp8,fp8,0,0.3235573371251424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,0.5140800078709921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,0.47573331991831463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,float16,0,0.35522135098775226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,fp8,0,0.3594133456548055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,0.5132160186767578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,128,1,fp8,fp8,0,0.328821341196696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,0.521343986193339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,float16,0,0.21500267585118613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,0.4794400135676066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,0.30216532945632935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,fp8,0,0.2185493310292562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,128,1,fp8,fp8,0,0.20573333899180093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,0.30713067452112836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,0.2874240080515544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,float16,0,0.18549867471059164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.2720479965209961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,fp8,0,0.18674665689468384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,128,1,fp8,fp8,0,0.1773279905319214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.2731039921442668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,0.25893867015838623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,float16,0,0.18756266434987387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.27373333772023517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,fp8,0,0.1893600026766459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,128,1,fp8,fp8,0,0.17822933197021484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.27322133382161456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,0.25916266441345215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,float16,0,0.19127466281255087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.2773066759109497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,fp8,0,0.19368533293406168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,128,1,fp8,fp8,0,0.181658665339152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.278383990128835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,0.26290667057037354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,float16,0,0.12519466876983643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.17322667439778647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,fp8,0,0.12651733557383218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,128,1,fp8,fp8,0,0.12136000394821167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.17523199319839478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.1689280072848002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,float16,0,0.10974933703740437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.15902400016784668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,fp8,0,0.11154133081436157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,128,1,fp8,fp8,0,0.1020853320757548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.1597813367843628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.1520960032939911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,float16,0,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.16863999764124551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,fp8,0,0.11268267035484314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,128,1,fp8,fp8,0,0.10429867108662923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.16152532895406088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.14869866768519083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,float16,0,0.11131200194358826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.1623360017935435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,fp8,0,0.11334932843844096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,128,1,fp8,fp8,0,0.10481599966684978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.1625386675198873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.15044266978899637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,float16,0,0.08891200025876363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.11995200316111247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,fp8,0,0.08877866466840108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,128,1,fp8,fp8,0,0.08306666711966197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.11988799770673116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.11274666587511699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,float16,0,0.0890933374563853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,fp8,0,0.08896000186602275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.12113599975903828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,128,1,fp8,fp8,0,0.08467732866605122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.1213653286298116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.11146666606267293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,float16,0,0.08900800347328186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.12016000350316365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,128,1,fp8,fp8,0,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.11981333295504253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.11116266250610352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,float16,0,0.08897599577903748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.11978666981061299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,fp8,fp8,0,0.08298133313655853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.11942933003107707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,float16,0,1.9346933364868164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,2.527008056640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,fp8,0,1.9421547253926594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,128,1,fp8,fp8,0,1.7387199401855469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,2.532144069671631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,2.3012266159057617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,float16,0,1.947482744852702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,2.5411413510640464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,fp8,0,1.9530933698018391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,128,1,fp8,fp8,0,1.7542932828267415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,2.544645309448242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,2.3139146169026694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,float16,0,2.00652805964152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,2.6041599909464517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,fp8,0,1.9831733703613281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,128,1,fp8,fp8,0,1.7851039568583171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,2.5775839487711587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,2.344384034474691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,float16,0,1.1063733100891113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,1.4177759488423665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,fp8,0,1.120970646540324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,128,1,fp8,fp8,0,1.0389173030853271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,1.4362293879191081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,1.325376033782959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,float16,0,0.9673600196838379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,1.2675893306732178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,fp8,0,0.9757760365804037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,128,1,fp8,fp8,0,0.8784213066101074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,1.2747626304626465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,1.160922686258952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,float16,0,0.9729653199513754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,1.2735520203908284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,fp8,0,0.9833493232727051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,128,1,fp8,fp8,0,0.8863360087076823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,1.2810293038686116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,1.1704533100128174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,float16,0,0.9877493381500244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,1.2904106775919597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,fp8,0,0.9974133173624674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,128,1,fp8,fp8,0,0.901360034942627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,1.297749360402425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,1.183888037999471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,float16,0,0.5657973289489746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,0.7279679775238037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,fp8,0,0.5782346725463867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,128,1,fp8,fp8,0,0.5360053380330404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,0.7391306559244791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,0.6817599932352701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,float16,0,0.4982453187306722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,0.6516160170237223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,fp8,0,0.5023893515268961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,128,1,fp8,fp8,0,0.4562133153279622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,0.6553386847178141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,0.6016480127970377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,float16,0,0.5013386805852255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,0.6566346486409506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,fp8,0,0.5080373287200928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,128,1,fp8,fp8,0,0.4607573350270589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,0.6615093151728312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,0.6050986846288046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,float16,0,0.5083946784337362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,0.6626453399658203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,fp8,0,0.5136906703313192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,128,1,fp8,fp8,0,0.4682133197784424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,0.6693867047627767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,0.6120800177256266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,float16,0,0.2983146707216899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,0.3843413194020589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,fp8,0,0.30636799335479736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,128,1,fp8,fp8,0,0.28998400767644245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,0.3985653320948283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,0.36139198144276935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,float16,0,0.2589919964472453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,0.33927468458811444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,fp8,0,0.26311999559402466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,128,1,fp8,fp8,0,0.24314133326212564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.34559468428293866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,0.31987200180689496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,float16,0,0.26265066862106323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,0.343669335047404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,fp8,0,0.2663466731707255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,128,1,fp8,fp8,0,0.24634667237599692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,0.345360000928243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,0.32311467329661053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,float16,0,0.2715839942296346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,0.35353068510691327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,fp8,0,0.27109867334365845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,128,1,fp8,fp8,0,0.24980266888936362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,0.351034681002299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,0.32663466533025104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,float16,0,0.1634986698627472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.21069333950678507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,fp8,0,0.16871466239293417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,128,1,fp8,fp8,0,0.1591200033823649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.21541867653528848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.20048532883326212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,float16,0,0.14084800084431967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.18597867091496786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,fp8,0,0.14310933152834573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,128,1,fp8,fp8,0,0.1304586629072825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.18675732612609863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.1734880010286967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,float16,0,0.14180800318717957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.1882773240407308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,fp8,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,128,1,fp8,fp8,0,0.13414399822553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.18903466065724692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.17521067460378012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,float16,0,0.14402666687965393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.18874667088190714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,fp8,0,0.14595199624697366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,128,1,fp8,fp8,0,0.1393173336982727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.1920213301976522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.17998399337132773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,float16,0,0.09282666444778442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.12166933218638103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,fp8,0,0.09635200103123982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,128,1,fp8,fp8,0,0.09485866626103719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.12360533078511556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.11947733163833618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,float16,0,0.08509332935015361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.11151466766993205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,fp8,0,0.08549867073694865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,128,1,fp8,fp8,0,0.08096000055472057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.11381333072980244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.105621337890625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,float16,0,0.0865760048230489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.11178132891654968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,fp8,0,0.0862506628036499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,128,1,fp8,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.11346667011578877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.10514666636784871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,float16,0,0.08526933193206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.11225066582361858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,fp8,0,0.08684266606966655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,128,1,fp8,fp8,0,0.08155199885368347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.11381866534550984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.10703466335932414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,float16,0,0.06849066913127899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.08852799733479817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,fp8,0,0.06871999800205231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,128,1,fp8,fp8,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.08816533287366231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.08294933537642162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,float16,0,0.0686773310105006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.08699733018875122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,fp8,0,0.06834666430950165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,128,1,fp8,fp8,0,0.06509866813818614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.08896533648173015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.08311999837557475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,float16,0,0.06861866513888042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.08748799562454224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,128,1,fp8,fp8,0,0.06442666550477345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.08783466617266338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.0830080012480418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,float16,0,0.06832533578077953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.08880000313123067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,fp8,0,0.06850666801134746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,128,1,fp8,fp8,0,0.06429333488146464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.08823999762535095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.08298666775226593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,float16,0,2.290815989176432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,2.6672372817993164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,fp8,0,2.2819573084513345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,128,1,fp8,fp8,0,2.229856014251709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,2.656277338663737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,2.5950133005777993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,float16,0,2.301738739013672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,2.684901237487793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,fp8,0,2.2910292943318686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,128,1,fp8,fp8,0,2.2946507136027017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,2.6685972213745117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,2.6533919970194497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,float16,0,2.3162666956583657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,2.698474566141764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,fp8,0,2.304229259490967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,128,1,fp8,fp8,0,2.285989284515381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,2.6812801361083984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,2.6501866976420083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,float16,0,1.2611467043558757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,1.4476319948832195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,fp8,0,1.2207893530527751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,128,1,fp8,fp8,0,1.2389813264211018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,1.426106611887614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,1.4301600456237793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,float16,0,1.1558612982432048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,1.3463679949442546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,fp8,0,1.1536533037821453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,128,1,fp8,fp8,0,1.0835786660512288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,1.3437600135803223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,1.2892106374104817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,float16,0,1.1553013324737549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,1.3491679827372234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,fp8,0,1.1567520300547283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,128,1,fp8,fp8,0,1.1325493653615315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,1.3434774080912273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,1.3236052989959717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,float16,0,1.1637492974599202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,1.3592534065246582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,fp8,0,1.1635626951853435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,128,1,fp8,fp8,0,1.1036907037099202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,1.3543413480122883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,1.3042826652526855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,float16,0,0.6326773166656494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,0.7384586334228516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,fp8,0,0.6212480068206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,128,1,fp8,fp8,0,0.6195573409398397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,0.7261066436767578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,0.7179093360900879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,float16,0,0.5890613396962484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,0.6831839879353842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,fp8,0,0.586954673131307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,128,1,fp8,fp8,0,0.5481599966684977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,0.683626651763916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,0.6414773464202881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,float16,0,0.5900906721750895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,0.6880106925964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,fp8,0,0.5887360175450643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,128,1,fp8,fp8,0,0.5487680037816366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,0.6848639647165934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,0.6414666573206583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,float16,0,0.6076533397038778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,0.6910133361816406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,fp8,0,0.5919413169225057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,128,1,fp8,fp8,0,0.5701760053634644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,0.6886879603068033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,0.6513386567433676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,float16,0,0.33000000317891437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,0.3921653429667155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,fp8,0,0.3230186700820923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,128,1,fp8,fp8,0,0.3187573353449504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,0.3768053452173869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,0.36770133177439374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,float16,0,0.3036160071690877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,0.35413865248362225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,fp8,0,0.30338666836420697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,128,1,fp8,fp8,0,0.2837013403574626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,0.35698668162027997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,0.3359573284784953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,float16,0,0.3049013415972392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,0.363045334815979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,fp8,0,0.30506134033203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,128,1,fp8,fp8,0,0.28462400039037067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,0.3547999858856201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,0.333024005095164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,float16,0,0.3081760009129842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,0.35971732934316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,fp8,0,0.3082080086072286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,128,1,fp8,fp8,0,0.28987733523050946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,0.35811734199523926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,0.33768534660339355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,float16,0,0.1748159925142924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.20349333683649698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,fp8,0,0.17173866430918375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,128,1,fp8,fp8,0,0.17182934284210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.2018079956372579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,0.1971679925918579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,float16,0,0.1588373382886251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.18738667170206705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,fp8,0,0.16081066926320395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,128,1,fp8,fp8,0,0.15215999881426492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.18754132588704428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.17688000202178955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,float16,0,0.16012799739837646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.18737600247065225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,fp8,0,0.15987199544906616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,128,1,fp8,fp8,0,0.1523253321647644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.1880319913228353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.17894933621088663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,float16,0,0.16326933105786642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.1890666683514913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,fp8,0,0.16197333733240762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,128,1,fp8,fp8,0,0.15641066431999207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.18959999084472656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.18141865730285645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,float16,0,0.09790399670600891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.1165706713994344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,fp8,0,0.09667733311653137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,128,1,fp8,fp8,0,0.0990133285522461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.11512533823649089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.11415466666221619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,float16,0,0.08980799714724223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.10504000385602315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,fp8,0,0.08900266885757446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,128,1,fp8,fp8,0,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.09962667028109233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,float16,0,0.09010666608810425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.10551466544469197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,fp8,0,0.08949333429336548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,128,1,fp8,fp8,0,0.08463467160860698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.10637866457303365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.09942932923634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,float16,0,0.09074667096138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.10684266686439514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,fp8,0,0.09032533566157024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,128,1,fp8,fp8,0,0.08308266599973042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.10656000177065532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.09994666775067647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,float16,0,0.05653866628805796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.06642666459083557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,128,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.0647626668214798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,float16,0,0.05445333321889242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.06464533507823944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,fp8,0,0.05435200035572052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,128,1,fp8,fp8,0,0.05040533343950907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.06444266438484192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.06155199805895487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,64,128,1,fp8,fp8,0,1.2270399729410808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,float16,0,0.05431999762852987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.06588799754778545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,fp8,0,0.05532266696294149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,128,1,fp8,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.06472533444563548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.06039466460545858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,float16,0,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.06433066725730896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,128,1,fp8,fp8,0,0.05148266752560934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.0644160012404124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,float16,0,0.039173332353432976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,fp8,0,0.0410453329483668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,128,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.04730666677157084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,128,1,fp8,fp8,0,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.044495999813079834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,float16,0,0.03846933444341024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.04523199796676636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,128,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,float16,0,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,128,1,fp8,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.04316799839337667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,float16,0,2.2249813079833984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,2.237658659617106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,fp8,fp8,0,2.1676905949910483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,fp8,0,2.215354601542155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,2.2306559880574546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,2.188192049662272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,float16,0,2.237679958343506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,2.256490707397461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,fp8,fp8,0,2.217583974202474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,fp8,0,2.228874683380127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,2.2494826316833496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,2.242725372314453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,float16,0,2.2537973721822104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,2.2648000717163086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,fp8,0,2.229152043660482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,128,1,fp8,fp8,0,2.205205281575521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,2.2715412775675454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,2.225050608317057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,float16,0,1.2110986709594727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,1.2256800333658855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,fp8,0,1.1934186617533367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,128,1,fp8,fp8,0,1.1951893170674641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,1.2187946637471516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,1.2128266493479412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,float16,0,1.1182719866434734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,1.1290826797485352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,fp8,0,1.114576021830241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,128,1,fp8,fp8,0,1.0824213027954102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,1.1242026487986247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,1.0687306722005208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,float16,0,1.121029297510783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,1.1331520080566406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,fp8,0,1.1198453108469646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,128,1,fp8,fp8,0,1.0966239770253499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,1.128442684809367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,1.1143306891123455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,float16,0,1.1304799715677898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,1.138917366663615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,fp8,0,1.1236266295115154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,128,1,fp8,fp8,0,1.0877546469370525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,1.1334933439890544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,1.1014026800791423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,float16,0,0.6138879855473837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,0.6236693461736044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,fp8,0,0.606053352355957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,128,1,fp8,fp8,0,0.614031990369161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,0.6141279935836792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,0.61135466893514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,float16,0,0.5693973302841187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,0.5713760058085123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,fp8,0,0.5667146841684977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,128,1,fp8,fp8,0,0.5426933368047079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,0.5719893376032511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,0.5384159882863363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,float16,0,0.5707039833068848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,0.57478400071462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,fp8,0,0.5708693265914917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,128,1,fp8,fp8,0,0.5337333281834921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,0.5728319883346558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,0.54257599512736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,float16,0,0.5741066535313925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,0.579093337059021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,fp8,0,0.5731733242670695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,128,1,fp8,fp8,0,0.5423893531163534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,0.578549345334371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,0.547327995300293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,float16,0,0.32042133808135986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,0.3256213267644246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,fp8,0,0.3141546646753947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,128,1,fp8,fp8,0,0.31309332450230914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,0.31858134269714355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,0.3157973289489746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,float16,0,0.2937866648038228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.2958933313687642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,fp8,0,0.2948746681213379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,128,1,fp8,fp8,0,0.275706668694814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.29708800713221234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.27910399436950684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,float16,0,0.2953760027885437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.29798932870229083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,fp8,0,0.2961493333180745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,128,1,fp8,fp8,0,0.2770613431930542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.297760009765625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.27911466360092163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,float16,0,0.29766400655110675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.3030400077501933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,fp8,0,0.2975359956423442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,128,1,fp8,fp8,0,0.2826186617215474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.3014400005340576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,float16,0,0.1702293356259664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.28649065891901654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.17320533593495688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,fp8,0,0.16803733507792154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,128,1,fp8,fp8,0,0.16662399967511496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.16991466283798218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.16777600844701132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,float16,0,0.15643733739852905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.1562026639779409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,fp8,0,0.15682133038838705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,128,1,fp8,fp8,0,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.15738133589426676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.1474506656328837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.15775466958681741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,float16,0,0.15572266777356467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,fp8,0,0.15466133753458658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,128,1,fp8,fp8,0,0.1481066644191742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.15632533033688864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.14945066968599954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,float16,0,0.157669335603714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.1612320045630137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,fp8,0,0.15849600235621134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,128,1,fp8,fp8,0,0.15230933825174967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.15914133191108704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.15278933445612589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,float16,0,0.0953653355439504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.09736532966295879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,fp8,0,0.09488000472386678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,128,1,fp8,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.09456533193588257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,float16,0,0.08694400389989217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.09855467081069946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.08717866738637288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,fp8,0,0.08900266885757446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,128,1,fp8,fp8,0,0.0806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.08827199538548787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.08188266555468242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,float16,0,0.08712533116340637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.08742400010426839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,fp8,0,0.08730666836102803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,128,1,fp8,fp8,0,0.08178133269151051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.0885653297106425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.08205333352088928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,float16,0,0.09025599559148152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.08808533350626628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,fp8,0,0.08898133039474487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,128,1,fp8,fp8,0,0.08148266871770223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.08931199709574382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.08360000451405843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,float16,0,0.05454400181770325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,fp8,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,128,1,fp8,fp8,0,0.05426666637261709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.05733866492907206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.055029332637786865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,float16,0,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,128,1,fp8,fp8,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.051029334465662636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,float16,0,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.0539680023988088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,float16,0,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,fp8,0,0.05300266544024149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,128,1,fp8,fp8,0,0.05009600023428599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.054757331808408104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,128,1,fp8,fp8,0,0.037178667883078255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.038346665600935616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,float16,0,0.038133333126703896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,fp8,0,0.03799466788768768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,128,1,fp8,fp8,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.036730666955312095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,float16,0,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,128,1,fp8,fp8,0,0.03589866558710734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.03801066676775614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,float16,0,0.03827733298142751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.03817066550254822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,fp8,0,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,128,1,fp8,fp8,0,0.03589333345492681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,fp8,0,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,128,1,fp8,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,float16,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,128,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,float16,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,128,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,float16,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,128,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,float16,0,1.0348107020060222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,1.0148639678955078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,fp8,0,1.031002680460612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,128,1,fp8,fp8,0,0.9821333090464274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,1.0099146366119385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,0.960693359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,float16,0,1.0374879837036133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,1.0172106424967449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,fp8,0,1.0337279637654622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,128,1,fp8,fp8,0,1.0285600026448567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,1.0125493208567302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,1.009669303894043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,float16,0,1.0516266822814941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,1.0323519706726074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,fp8,0,1.0445973078409831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,128,1,fp8,fp8,0,1.0093759695688884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,1.0261066754659016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,0.9918133417765299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,float16,0,0.5749013423919678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,0.5651839971542358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,fp8,0,0.5629973411560059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,128,1,fp8,fp8,0,0.565829316775004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,0.551530679066976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,0.5552213191986084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,float16,0,0.5248426596323649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,0.5152906576792399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,fp8,0,0.523909330368042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,128,1,fp8,fp8,0,0.49457065264383954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,0.5128693183263143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,0.4838240146636963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,float16,0,0.5288906494776408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,0.5188426574071249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,fp8,0,0.5268106857935587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,128,1,fp8,fp8,0,0.5004160006841024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,0.5159680048624674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,0.48880000909169513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,float16,0,0.5343626737594604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,0.5238773425420126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,fp8,0,0.532538652420044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,128,1,fp8,fp8,0,0.5061066548029581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,0.5220906734466553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,0.49382932980855304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,float16,0,0.299018661181132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,0.29474133253097534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,fp8,0,0.29501332839330036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,128,1,fp8,fp8,0,0.2956906755765279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,0.28754132986068726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,0.2884640097618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,float16,0,0.27241599559783936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.26770132780075073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,fp8,0,0.2736639976501465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,128,1,fp8,fp8,0,0.2567360003789266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.2674186627070109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.25169599056243896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,float16,0,0.2738933364550273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.2683626612027486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,fp8,0,0.2735626697540283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,128,1,fp8,fp8,0,0.25783999760945636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.26955199241638184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.2524906595547994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,float16,0,0.27821866671244305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.27332266171773273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,fp8,0,0.278714656829834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,128,1,fp8,fp8,0,0.264138658841451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.273637334505717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.2577386697133382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,float16,0,0.16033066312472025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.15686933199564615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,fp8,0,0.15677332878112793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,128,1,fp8,fp8,0,0.1584213376045227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.15522666772206625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.15646933515866598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,float16,0,0.14629333217938742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.1437440017859141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,fp8,0,0.1455680032571157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,128,1,fp8,fp8,0,0.13644267121950784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.14351999759674072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.1343946655591329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,float16,0,0.14549332857131958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.14324266711870828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,fp8,0,0.14571199814478555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,128,1,fp8,fp8,0,0.13742933670679727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.14309333761533102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.1345866620540619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,float16,0,0.1486133337020874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.14548266927401224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,fp8,0,0.1479520003000895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,128,1,fp8,fp8,0,0.14060266812642416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.14443733294804892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.13806933164596558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,float16,0,0.09126399954160054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.09005332986513774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,fp8,0,0.0893440047899882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,128,1,fp8,fp8,0,0.09141332904497783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.08880533774693807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.0913706620534261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,float16,0,0.08321600159009297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.08263466755549113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,fp8,0,0.08298133313655853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,128,1,fp8,fp8,0,0.07694933315118153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.08107733229796092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.07477866609891255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,float16,0,0.08381332953770955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.08245866497357686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,fp8,0,0.08376000324885051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,128,1,fp8,fp8,0,0.07752533257007599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.08092266817887624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.07547733187675476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,float16,0,0.0844533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.08427733182907104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,fp8,0,0.08390933275222778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,128,1,fp8,fp8,0,0.07854400078455608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.08171199758847554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.07693333427111308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,float16,0,0.05198400219281515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.0510506679614385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,128,1,fp8,fp8,0,0.05109866460164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.05124266445636749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,float16,0,0.05012266834576925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,fp8,0,0.0491839994986852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,128,1,fp8,fp8,0,0.04602666695912679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.04897066454092661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.04619200030962626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,float16,0,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,fp8,0,0.04958933095137278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,128,1,fp8,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.04933333396911621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,float16,0,0.050101334849993386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,fp8,0,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,128,1,fp8,fp8,0,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.04814933240413666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,float16,0,0.03713600089152654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,fp8,0,0.03679466744263967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,128,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,float16,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,128,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.03425599883000056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,float16,0,0.03587199995915095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,128,1,fp8,fp8,0,0.03450666616360346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,float16,0,0.03689600030581156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,128,1,fp8,fp8,0,0.03422933320204417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,float16,0,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,fp8,0,0.025983999172846477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,128,1,fp8,fp8,0,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.024165332317352295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,128,1,fp8,fp8,0,0.02474133421977361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,128,1,fp8,fp8,0,0.024127999941507976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.02387733260790507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,float16,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,128,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,float16,0,0.5558826526006063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,0.5534400145212809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,fp8,0,0.5534506638844808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,128,1,fp8,fp8,0,0.533456007639567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,0.5526826779047648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,0.5325013399124146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,float16,0,0.5563946564992269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,0.5562400023142496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,fp8,0,0.5557013352711996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,128,1,fp8,fp8,0,0.544704000155131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,0.555178682009379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,0.5434879859288534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,float16,0,0.5629599889119467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,0.5653013388315836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,fp8,0,0.5597866773605347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,128,1,fp8,fp8,0,0.5468426545461019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,0.5604960123697916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,0.5480053424835205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,float16,0,0.3109653393427531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,0.3111413319905599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,fp8,0,0.3045813242594401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,128,1,fp8,fp8,0,0.3097440004348755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,0.3052000006039937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,0.3107680082321167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,float16,0,0.2859039902687073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.2855573296546936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,fp8,0,0.2839786609013875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,128,1,fp8,fp8,0,0.27348800500233966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.2848586638768514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.274671991666158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,float16,0,0.2876266638437907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.28682132562001544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,fp8,0,0.2852746645609538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,128,1,fp8,fp8,0,0.2775040070215861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.2857973376909892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.27857067187627155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,float16,0,0.29125867287317914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.29012266794840497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,fp8,0,0.28859732548395794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,128,1,fp8,fp8,0,0.282096008459727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.2893120050430298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.2824106613794963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,float16,0,0.16416000326474509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.16345600287119547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,fp8,0,0.1627253293991089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,128,1,fp8,fp8,0,0.16699733336766562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.16247999668121338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.16570132970809937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,float16,0,0.15096533298492432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.1516586641470591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,fp8,0,0.15211733182271323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,128,1,fp8,fp8,0,0.14467199643452963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.15222932895024618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.14536533753077188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,float16,0,0.1529920001824697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.15307199954986572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,fp8,0,0.15130666891733804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,128,1,fp8,fp8,0,0.146506667137146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.15223999818166098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.14787200093269348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,float16,0,0.15402133266131082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.15435733397801718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,fp8,0,0.15285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,128,1,fp8,fp8,0,0.14874133467674255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.15362667044003805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.15030933419863382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,float16,0,0.09282132983207703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.09298666318257649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,fp8,0,0.09057600299517314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,128,1,fp8,fp8,0,0.09408533573150635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.09016533692677815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.0941973328590393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,float16,0,0.08498666683832805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,fp8,0,0.08507733543713887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,128,1,fp8,fp8,0,0.07960000137488048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.08487466971079509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.08027733365694682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,float16,0,0.08530132969220479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.08493333061536153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,fp8,0,0.0846453309059143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,128,1,fp8,fp8,0,0.07991999884446462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.08463467160860698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.08017600079377492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,float16,0,0.08733333150545756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.08686400453249614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,fp8,0,0.08688533306121826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,128,1,fp8,fp8,0,0.08302400012811025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.08525333801905315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.08147733410199483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,float16,0,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.05438933273156484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,fp8,0,0.053269331653912864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,128,1,fp8,fp8,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.05300266544024149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,float16,0,0.05189866820971171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,128,1,fp8,fp8,0,0.05009600023428599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,fp8,0,0.051728000243504844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,128,1,fp8,fp8,0,0.04956800242265066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.0507893313964208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,float16,0,0.05218133330345154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.05183466772238413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,128,1,fp8,fp8,0,0.04930666585763296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.05136533578236898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,float16,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.03490666548411051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,128,1,fp8,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,float16,0,0.03384000062942505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,128,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,float16,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,fp8,0,0.034490667283535004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,128,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.03403199960788091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,128,1,fp8,fp8,0,0.03317866722742716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.03503466645876566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,128,1,fp8,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,float16,0,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,128,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.025994665920734406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,128,1,fp8,fp8,0,0.02402666707833608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.025973332424958546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,float16,0,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,128,1,fp8,fp8,0,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,float16,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,float16,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.018016000588734944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,128,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,float16,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,float16,0,0.3935840129852295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.3908906777699788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,fp8,0,0.3887253204981486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,128,1,fp8,fp8,0,0.3678239981333415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.389850656191508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.36825064818064374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,float16,0,0.39164801438649494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.39209600289662677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,fp8,0,0.3894186814626058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,128,1,fp8,fp8,0,0.3721119960149129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.39081064860026044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.37269866466522217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,float16,0,0.39718933900197345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.39657068252563477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,fp8,0,0.3927786747614543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,128,1,fp8,fp8,0,0.37466665108998615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.3949706554412842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.37463998794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,float16,0,0.21583465735117593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.2153493364651998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,fp8,0,0.21371199687321982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,128,1,fp8,fp8,0,0.2116426626841227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.21382399400075278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.21207465728123984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,float16,0,0.20408000548680624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.2035413384437561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,fp8,0,0.20385066668192545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,128,1,fp8,fp8,0,0.19171200195948282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.2039946715037028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.19130132595698038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,float16,0,0.20338133970896402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.20335467656453451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,fp8,0,0.20310932397842407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,128,1,fp8,fp8,0,0.19292799631754556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.20463999112447104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.19356266657511392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,float16,0,0.2065760095914205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.20623467365900675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,fp8,0,0.20430399974187216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,128,1,fp8,fp8,0,0.19572800397872925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.20588266849517822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.19779199361801147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,float16,0,0.11784000198046367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.11833600203196208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,fp8,0,0.11641066273053487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,128,1,fp8,fp8,0,0.11727999647458394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.11732266346613567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.11823999881744385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,float16,0,0.11197333534558614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.11154133081436157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,fp8,0,0.10991467038790385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,128,1,fp8,fp8,0,0.10258666674296062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.11004799604415894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.10287466645240784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,float16,0,0.1113813320795695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.11152000228563945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,fp8,0,0.10971732934315999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,128,1,fp8,fp8,0,0.10308800141016643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.11058132847150166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.10392533739407857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,float16,0,0.11290666460990906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.11346133550008138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,fp8,0,0.11189333597819011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,128,1,fp8,fp8,0,0.10406933228174846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.11117866635322571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.10450667142868042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,float16,0,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.066170667608579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,fp8,0,0.0665280024210612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,128,1,fp8,fp8,0,0.06562133133411407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.06500266492366791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,float16,0,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.06420266628265381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,fp8,0,0.06243200103441874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,128,1,fp8,fp8,0,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,float16,0,0.06337599953015645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.06425599753856659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,fp8,0,0.06214933097362518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,128,1,fp8,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,float16,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,fp8,0,0.062218666076660156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,128,1,fp8,fp8,0,0.060122668743133545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,128,1,fp8,fp8,0,0.04055999964475632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.0422986646493276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,float16,0,0.04081066697835922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,128,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,fp8,0,0.03985599925120672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,float16,0,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.041696002086003624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,fp8,0,0.04029333343108495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.04048533240954081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.0393653338154157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,float16,0,0.028143999477227528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.0278613343834877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,fp8,0,0.028970666229724884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,128,1,fp8,fp8,0,0.02847466617822647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,128,1,fp8,fp8,0,0.02638400097688039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,fp8,0,0.022069332500298817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,128,1,fp8,fp8,0,0.022346665461858112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.02202133337656657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,float16,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,fp8,0,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,float16,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,128,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,128,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,128,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,128,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,float16,0,0.31268266836802167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.3118879993756612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,fp8,0,0.30991466840108234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,128,1,fp8,fp8,0,0.28723732630411786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.3113866647084554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.2888159950574239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,float16,0,0.3108160098393758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.309989333152771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,fp8,0,0.3099946578343709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,128,1,fp8,fp8,0,0.2896053393681844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.3096906741460164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.2899679938952128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,float16,0,0.3115893403689067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.3123679955800374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,fp8,0,0.31069332361221313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,128,1,fp8,fp8,0,0.29145065943400067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.3113119999567668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.2921440005302429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,float16,0,0.17071467638015747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.17086400588353476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,fp8,0,0.1688906749089559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,128,1,fp8,fp8,0,0.16504533092180887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.170250674088796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.16539200146993002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,float16,0,0.1633386711279551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.1643786629041036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,fp8,0,0.1630453368028005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,128,1,fp8,fp8,0,0.15054933230082193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.16433067123095194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.15132266283035278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,float16,0,0.1641866664091746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.16525333126386008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,fp8,0,0.16370133558909097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,128,1,fp8,fp8,0,0.15148799618085226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.16474666198094687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.15144532918930054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,float16,0,0.16608533263206482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.16521066427230835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,fp8,0,0.16497600078582764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,128,1,fp8,fp8,0,0.15158399939537048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.1649440030256907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.1521440049012502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,float16,0,0.09084266424179077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.09143466750780742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,fp8,0,0.09058666229248047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,128,1,fp8,fp8,0,0.08819199601809184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.09259200096130371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.08795199791590373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,float16,0,0.08918399612108867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.08932266632715861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,fp8,0,0.08924266695976257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,128,1,fp8,fp8,0,0.08456533153851827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.08880533774693807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,float16,0,0.08893866340319316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.08910933136940002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,128,1,fp8,fp8,0,0.0828906645377477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.08880000313123067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.08302933474381764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,float16,0,0.0897706647713979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.08980799714724223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,fp8,0,0.08956799904505412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,128,1,fp8,fp8,0,0.08505599697430928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.0890773336092631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.08321600159009297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,fp8,0,0.05515199899673462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,128,1,fp8,fp8,0,0.05340800185998281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.054373333851496376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.05228800078233083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,float16,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,fp8,0,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,128,1,fp8,fp8,0,0.051674668987592064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.05317866802215576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.05045333504676819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,float16,0,0.053599998354911804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.05285866558551788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,128,1,fp8,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,float16,0,0.052655999859174095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,fp8,0,0.054458667834599815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,128,1,fp8,fp8,0,0.05232533315817515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.053674668073654175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,float16,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,128,1,fp8,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,128,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,float16,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,128,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,float16,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,128,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,fp8,0,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,128,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,float16,0,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,128,1,fp8,fp8,0,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,float16,0,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,128,1,fp8,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,float16,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,float16,0,0.2699679931004842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.2702346642812093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,fp8,0,0.27157866954803467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,128,1,fp8,fp8,0,0.25042666991551715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.2707039912541707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.24738667408625284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,float16,0,0.26924800872802734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.26925865809122723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,fp8,0,0.2690773407618205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,128,1,fp8,fp8,0,0.2488106687863668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.2676159938176473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.24860799312591553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,float16,0,0.2690239946047465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.2693866689999898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,fp8,0,0.2694026629130046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,128,1,fp8,fp8,0,0.24953067302703857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.2697333296140035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.24863467613855997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,float16,0,0.14202666282653809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.1425279974937439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,fp8,0,0.14286399881045023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,128,1,fp8,fp8,0,0.13834133744239807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.14283733566602072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.13690132896105447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,float16,0,0.14125333229700723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.14100266496340433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,fp8,0,0.14140266180038452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,128,1,fp8,fp8,0,0.1322773297627767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.14124799768129984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.1321386694908142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,float16,0,0.14036267002423605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.14011200269063315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,fp8,0,0.14013866583506265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,128,1,fp8,fp8,0,0.13166933258374533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.14085867007573447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.1322826643784841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,float16,0,0.14058666427930197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.14035200079282126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,fp8,0,0.14201066891352335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,128,1,fp8,fp8,0,0.13241599996884665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.1423360009988149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.13261866569519043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,float16,0,0.0798880010843277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.08061866462230682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,fp8,0,0.0813973347345988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,128,1,fp8,fp8,0,0.0766186664501826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.07655466596285503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,float16,0,0.07871466875076294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.07856533428033192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,fp8,0,0.08018133540948232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,128,1,fp8,fp8,0,0.07584000130494435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.07860800127188365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.07454399764537811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,float16,0,0.08018133540948232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.08000533282756805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,fp8,0,0.07979199786980946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,128,1,fp8,fp8,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.07869333525498708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.07444266478220622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,float16,0,0.0790826678276062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.07889066636562347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,fp8,0,0.08049599826335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,128,1,fp8,fp8,0,0.07626666625340779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.07665599882602692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,128,1,fp8,fp8,0,0.04762666424115499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,float16,0,0.046623999873797096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.047482664386431374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,128,1,fp8,fp8,0,0.04603200157483419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,float16,0,0.04643733302752177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.046298667788505554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,fp8,0,0.04714666803677877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,128,1,fp8,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,128,1,fp8,fp8,0,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.04819199939568838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,float16,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,128,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,float16,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,128,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,float16,0,0.03086400032043457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.030810666580994923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,128,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,float16,0,0.030037333567937214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,128,1,fp8,fp8,0,0.031013332307338715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.03028800090154012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,float16,0,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,128,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.02378133436044057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,fp8,0,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,128,1,fp8,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.024170666933059692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,float16,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,fp8,0,0.1585760017236074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,128,1,fp8,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,128,1,fp8,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,128,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,float16,0,2.2430079778035483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,13.203146616617838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,fp8,0,2.2575732866923013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,128,1,fp8,fp8,0,2.0705493291219077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,13.223716735839844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,12.214458465576172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,float16,0,2.2537760734558105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,13.231333414713541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,fp8,0,2.273098627726237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,128,1,fp8,fp8,0,2.0885225931803384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,13.250848134358725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,12.236340840657553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,float16,0,2.2905492782592773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,13.307440439860025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,fp8,0,2.311567942301432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,128,1,fp8,fp8,0,2.128490606943766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,13.307557423909506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,12.275856018066406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,float16,0,1.3129119873046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,6.900890350341797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,fp8,0,1.3429813385009766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,128,1,fp8,fp8,0,1.2583359877268474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,6.932554880777995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,6.383333206176758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,float16,0,1.1659680207570393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,fp8,0,0.08906666437784831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,6.699109395345052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,fp8,0,1.1765440305074055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,128,1,fp8,fp8,0,1.078938643137614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,6.72265625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,6.200464248657227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,float16,0,1.179258664449056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,fp8,0,1.1858987013498943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,128,1,fp8,fp8,0,1.0906613667805989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,6.713578542073567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,6.7302398681640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,float16,0,1.1910400390625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,6.20964241027832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,fp8,0,1.2018346786499023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,128,1,fp8,fp8,0,1.1064480145772297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,6.743055979410808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,6.7522932688395185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,float16,0,0.721893310546875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,6.231583913167317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,fp8,0,0.74562668800354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,3.5676854451497397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,128,1,fp8,fp8,0,0.6991626421610514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,3.5862293243408203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,3.310720125834147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,float16,0,0.6530613501866659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,fp8,0,0.6579626798629761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,128,1,fp8,fp8,0,0.6094026565551758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,3.475205421447754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,3.4776906967163086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,3.2215894063313804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,float16,0,0.6569653352101644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,fp8,0,0.663050651550293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,128,1,fp8,fp8,0,0.6143946647644043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,3.4810667037963867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,3.4862400690714517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,3.224954605102539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,float16,0,0.6626826524734497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,fp8,0,0.6702240308125814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,128,1,fp8,fp8,0,0.6242453257242838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,3.4932800928751626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,3.4988800684611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,float16,0,0.4931146701176961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,3.235722541809082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,fp8,0,0.4944213231404622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,128,1,fp8,fp8,0,0.46370665232340497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,1.9684534072875977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,1.8180853525797527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,1.968714714050293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,float16,0,0.4923413197199504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,fp8,0,0.4938559929529826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,1.9560000101725261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,128,1,fp8,fp8,0,0.4644426504770915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,1.9558347066243489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,1.817471981048584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,float16,0,0.4924480120340983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,fp8,0,0.4963839848836263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,1.9567947387695312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,128,1,fp8,fp8,0,0.46579734484354657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,1.8173920313517253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,1.957738717397054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,float16,0,0.4925813277562459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,fp8,0,0.49517865975697833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,1.9595306714375813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,128,1,fp8,fp8,0,0.46450666586558026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,1.959546724955241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,1.820906639099121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,float16,0,1.6611785888671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,fp8,0,1.677077293395996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,128,1,fp8,fp8,0,1.5317920049031575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,7.792266845703125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,7.8020477294921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,7.210970560709636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,float16,0,1.6721439361572266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,fp8,0,1.6881279945373535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,128,1,fp8,fp8,0,1.5475145975748699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,7.810810724894206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,7.828298568725586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,float16,0,1.6979200045267742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,7.224965413411458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,fp8,0,1.7156532605489094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,128,1,fp8,fp8,0,1.5803626378377278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,7.85427729288737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,7.869429270426433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,float16,0,0.9866507053375244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,7.252917607625325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,fp8,0,1.0083733399709065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,4.126309394836426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,128,1,fp8,fp8,0,0.9444213708241781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,4.149141311645508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,float16,0,0.8779306411743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,3.8232854207356772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,fp8,0,0.885205348332723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,3.980559984842936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,128,1,fp8,fp8,0,0.8135253588358561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,3.988767941792806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,3.6882241566975913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,float16,0,0.88373335202535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,fp8,0,0.8919466336568197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,128,1,fp8,fp8,0,0.8204106489817301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,3.9924745559692383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,3.9992640813191733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,3.695221265157064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,float16,0,0.8947679996490479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,fp8,0,0.9054186344146729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,128,1,fp8,fp8,0,0.833306630452474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,4.011242548624675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,4.018320083618164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,3.709967931111654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,float16,0,0.5460000038146973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,fp8,0,0.5602613290150961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,128,1,fp8,fp8,0,0.5292533238728842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,2.155183951059977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,2.168293317159017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,2.0050719579060874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,float16,0,0.4948586622873942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,fp8,0,0.4994293451309204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,128,1,fp8,fp8,0,0.46407465140024823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,2.0866880416870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,2.0892640749613443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,1.9363892873128254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,float16,0,0.5002400080362955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,fp8,0,0.5045973459879557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,2.0917760531107583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,128,1,fp8,fp8,0,0.46750934918721515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,1.9395947456359863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,2.0954666137695312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,float16,0,0.5038933356602987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,fp8,0,0.5106773376464844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,2.101151943206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,128,1,fp8,fp8,0,0.47388799985249835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,1.9469013214111328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,2.107093334197998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,float16,0,0.3752266565958659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,fp8,0,0.37544532616933185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,1.2201600074768066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,128,1,fp8,fp8,0,0.3551520109176636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,1.2209386825561523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,1.1276960372924805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,float16,0,0.37354131539662677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,fp8,0,0.37373332182566327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,1.2097333272298176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,128,1,fp8,fp8,0,0.3531413475672404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,1.2096532980600994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,1.1251839796702068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,float16,0,0.3742560148239136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,fp8,0,0.3747733434041341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,1.2104533513387044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,128,1,fp8,fp8,0,0.3527093331019084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,1.210373322168986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,1.1253706614176433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,float16,0,0.37432531515757245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,fp8,0,0.37492799758911133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,1.212719996770223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,128,1,fp8,fp8,0,0.353274663289388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,1.213818629582723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,1.1273226737976074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,float16,0,1.3822773297627766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,fp8,0,1.39408540725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,128,1,fp8,fp8,0,1.2734026908874512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,5.616613388061523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,5.624581019083659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,5.193951924641927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,float16,0,1.3917706807454426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,fp8,0,1.405413309733073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,128,1,fp8,fp8,0,1.2836373647054036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,5.630959828694661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,5.638671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,5.203776041666667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,float16,0,1.4099146525065105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,fp8,0,1.425653298695882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,128,1,fp8,fp8,0,1.3095893065134685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,5.661802927652995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,5.673648198445638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,float16,0,0.8239733378092448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,5.230229377746582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,fp8,0,0.8432959715525309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,128,1,fp8,fp8,0,0.7898613611857096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,2.997157414754232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,3.017727851867676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,2.780869483947754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,float16,0,0.7339839935302734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,fp8,0,0.7414666811625162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,128,1,fp8,fp8,0,0.6809013684590658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,2.8809547424316406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,2.8905067443847656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,2.67140261332194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,float16,0,0.7397706508636475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,2.892533302307129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,fp8,0,0.7464426358540853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,128,1,fp8,fp8,0,0.6859520276387533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,2.898720105489095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,float16,0,0.7494026819864908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,2.6744213104248047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,fp8,0,0.7566080093383789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,128,1,fp8,fp8,0,0.6979733308156332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,2.9055732091267905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,2.9122934341430664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,2.688096046447754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,float16,0,0.4575413465499878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,fp8,0,0.47003201643625897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,128,1,fp8,fp8,0,0.4455039898554484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,1.5775146484375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,1.4704426129659016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,1.5875946680704753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,float16,0,0.41442131996154785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,fp8,0,0.4188586473464966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,1.520250638326009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,128,1,fp8,fp8,0,0.3896426757176717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,1.524170716603597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,1.4143786430358887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,float16,0,0.4179413318634033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,fp8,0,0.4225279887517293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,1.5259466171264648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,128,1,fp8,fp8,0,0.3924373388290405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,1.529743989308675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,1.4183146158854167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,float16,0,0.42286932468414307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,fp8,0,0.4278026819229126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,1.5329599380493164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,128,1,fp8,fp8,0,0.39820265769958496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,1.5360159873962402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,1.4224425951639812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,float16,0,0.3161333401997884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,fp8,0,0.3171413342158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,0.9084373315175375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,128,1,fp8,fp8,0,0.2998186747233073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,0.9100480079650879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,0.841749350229899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,float16,0,0.3160799940427144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,fp8,0,0.31613866488138836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,0.9006506601969401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,128,1,fp8,fp8,0,0.2975520094235738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,0.9000373681386312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,0.8410507043202718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,float16,0,0.314463992913564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,fp8,0,0.3163786729176839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,0.9009919961293539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,128,1,fp8,fp8,0,0.2978293299674988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,0.90228803952535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,0.8395520051320394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,float16,0,0.31601067384084064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,fp8,0,0.315775990486145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,0.9024799664815267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,128,1,fp8,fp8,0,0.2969653407732646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,0.9025013446807861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,0.8457012971242269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,float16,0,2.1763893763224282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,fp8,0,2.1936052640279136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,128,1,fp8,fp8,0,2.005018711090088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,7.494661331176758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,6.927551905314128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,7.50544548034668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,float16,0,2.1930294036865234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,fp8,0,2.210752010345459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,128,1,fp8,fp8,0,2.0264639854431152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,7.514858881632487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,6.943978627522786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,7.537455876668294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,float16,0,2.2307306925455728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,fp8,0,2.2486133575439453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,128,1,fp8,fp8,0,2.0670293172200522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,7.572495778401692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,6.987898508707683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,7.58732795715332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,float16,0,1.2553866704305012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,fp8,0,1.283461332321167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,3.972378730773926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,128,1,fp8,fp8,0,1.1991840203603108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,3.68669859568278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,4.000389417012532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,float16,0,1.1072266896565754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,fp8,0,1.1174879868825276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,3.7872158686319985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,128,1,fp8,fp8,0,1.0206293265024822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,3.5045385360717773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,3.7992159525553384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,float16,0,1.1152693430582683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,fp8,0,1.124128023783366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,3.8030080795288086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,128,1,fp8,fp8,0,1.028762658437093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,3.5129334131876626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,3.8122078577677407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,float16,0,1.1291306813557942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,fp8,0,1.1429440180460613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,128,1,fp8,fp8,0,1.0483253002166748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,3.824703852335612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,3.8410027821858725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,3.534426689147949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,float16,0,0.6611306667327881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,fp8,0,0.6771039962768555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,2.0466079711914062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,128,1,fp8,fp8,0,0.6362026532491049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,2.0635093053181968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,1.9035785992940266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,float16,0,0.5906986792882284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,fp8,0,0.5967733462651571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,1.9557493527730305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,128,1,fp8,fp8,0,0.5473599831263224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,1.9628160794576008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,float16,0,0.5936746597290039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,1.9630506833394368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,fp8,0,0.600383996963501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,128,1,fp8,fp8,0,0.5531733433405558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,1.970021406809489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,1.8191946347554524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,float16,0,0.5998613437016805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,1.9741439819335938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,fp8,0,0.6073919932047526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,128,1,fp8,fp8,0,0.5612159967422485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,1.9827146530151367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,1.8277759552001953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,float16,0,0.36894933382670086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,1.088549296061198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,fp8,0,0.3781760136286418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,64,128,1,fp8,fp8,0,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,128,1,fp8,fp8,0,0.35922133922576904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,1.096010684967041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,1.0170559883117676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,float16,0,0.33130667606989544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,fp8,0,0.3346506754557292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,1.0435893535614014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,128,1,fp8,fp8,0,0.3137066761652629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,1.0449386437733967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,0.9710453351338705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,float16,0,0.3347040017445882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,fp8,0,0.33881600697835285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,1.045514663060506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,128,1,fp8,fp8,0,0.31621867418289185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,1.0493760108947754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,0.9755840301513672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,float16,0,0.33869866530100506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,fp8,0,0.34293333689371747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,1.0548906326293945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,128,1,fp8,fp8,0,0.32019199927647907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,1.0586400032043457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,0.9776533444722494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,float16,0,0.258730669816335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,0.6418720086415609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,fp8,0,0.25818665822347003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,128,1,fp8,fp8,0,0.24238399664560953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,0.6422826846440634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,0.5946400165557861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,float16,0,0.2550719976425171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,fp8,0,0.25633599360783893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,0.6357973416646322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,128,1,fp8,fp8,0,0.24038400252660116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,0.6357226769129435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,0.5920426845550537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,float16,0,0.25642667214075726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,0.6358186801274618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,fp8,0,0.2547360062599182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,128,1,fp8,fp8,0,0.24066134293874106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,0.635477344195048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,0.5944426854451498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,float16,0,0.2561386624972026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,fp8,0,0.255295991897583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,0.6376959880193075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,128,1,fp8,fp8,0,0.2427039941151937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,0.6371839841206869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,0.5944426854451498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,float16,0,1.6130293210347493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,fp8,0,1.6285759607950847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,4.541749318440755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,128,1,fp8,fp8,0,1.4853919347127278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,4.19163195292155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,4.556634585062663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,float16,0,1.6246986389160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,fp8,0,1.6386079788208008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,4.560949325561523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,128,1,fp8,fp8,0,1.497221310933431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,4.575551986694336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,4.206122716267903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,float16,0,1.6507412592569988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,fp8,0,1.6687733332316081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,4.591919898986816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,128,1,fp8,fp8,0,1.531786600748698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,4.608010609944661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,4.240874608357747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,float16,0,0.9426133632659912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,fp8,0,0.9643039703369141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,2.4489332834879556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,128,1,fp8,fp8,0,0.9000373681386312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,2.4695626894632974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,2.273786703745524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,float16,0,0.8343199888865153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,fp8,0,0.841327985127767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,2.315824031829834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,128,1,fp8,fp8,0,0.7696959972381592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,2.3240639368693032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,2.139029343922933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,float16,0,0.8387839794158936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,fp8,0,0.8480213483174642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,2.32425594329834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,128,1,fp8,fp8,0,0.7760960261027018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,2.3336960474650064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,2.1481706301371255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,float16,0,0.8587520122528076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,fp8,0,0.8591679732004801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,2.3405067125956216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,128,1,fp8,fp8,0,0.7893013159434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,2.349733352661133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,2.1568586031595864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,float16,0,0.5002933343251547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,fp8,0,0.5137866735458374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,1.2717599868774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,128,1,fp8,fp8,0,0.48213334878285724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,1.2838079929351807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,1.1860533555348713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,float16,0,0.44704000155131024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,fp8,0,0.4504106839497884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,1.2062186400095622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,128,1,fp8,fp8,0,0.41756268342336017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,1.2100693384806316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,1.1282613277435303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,float16,0,0.4506666660308838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,fp8,0,0.45527466138203937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,1.210858662923177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,128,1,fp8,fp8,0,0.4207093318303426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,1.2177493572235107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,1.1242400010426838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,float16,0,0.4557226498921712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,fp8,0,0.4612160126368205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,1.2205173174540203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,128,1,fp8,fp8,0,0.42599467436472577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,1.226474682490031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,1.1282986799875896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,float16,0,0.28205867608388263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,0.68777068456014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,fp8,0,0.2884746591250102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,128,1,fp8,fp8,0,0.2769013245900472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,0.6944479942321777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,0.643392006556193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,float16,0,0.25254400571187335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,fp8,0,0.25456533829371136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,0.6503893136978149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,128,1,fp8,fp8,0,0.23982399702072144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,0.6522239844004313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,0.6093546549479166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,float16,0,0.2540266712506612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,fp8,0,0.2595679958661397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,0.6545706590016683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,128,1,fp8,fp8,0,0.24551467100779215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,0.656496008237203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,0.6111040115356445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,float16,0,0.25917865832646686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,fp8,0,0.26208533843358356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,0.6600319941838583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,128,1,fp8,fp8,0,0.2465440034866333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,0.6633546749750773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,0.6177066564559937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,float16,0,0.19949867328008017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,fp8,0,0.19986667235692343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,0.41914133230845135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,128,1,fp8,fp8,0,0.18702934185663858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,0.4196213483810425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,0.3882666826248169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,float16,0,0.19527999560038248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,fp8,0,0.195306658744812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,0.41247467199961346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,128,1,fp8,fp8,0,0.18572266896565756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,0.4138186772664388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,0.38661332925160724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,float16,0,0.19611199696858725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,fp8,0,0.19668267170588175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,0.4135253429412842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,128,1,fp8,fp8,0,0.18503467241923013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,0.4128640095392863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,0.38791465759277344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,float16,0,0.1974453330039978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,fp8,0,0.1973386605580648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,0.4151253302892049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,128,1,fp8,fp8,0,0.18674665689468384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,0.41644267241160077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,0.38764798641204834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,float16,0,2.146442731221517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,fp8,0,2.158730665842692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,4.5929921468098955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,128,1,fp8,fp8,0,1.971941312154134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,4.608538627624512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,4.244106610616048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,float16,0,2.1597493489583335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,fp8,0,2.1751839319864907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,4.615866661071777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,128,1,fp8,fp8,0,1.9904266993204753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,4.6332213083903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,4.262144088745117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,float16,0,2.2009973526000977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,4.664469401041667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,fp8,0,2.2160213788350425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,128,1,fp8,fp8,0,2.0328747431437173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,4.306591987609863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,4.682389259338379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,float16,0,1.2242773373921711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,2.4929653803507485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,fp8,0,1.2483306725819905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,128,1,fp8,fp8,0,1.166874647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,2.5157492955525718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,2.3183040618896484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,float16,0,1.0754133065541585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,fp8,0,1.0853813489278157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,2.3125759760538735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,128,1,fp8,fp8,0,0.9872480233510336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,2.3230667114257812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,2.133509318033854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,float16,0,1.0925920009613037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,fp8,0,1.091274658838908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,2.3209546407063804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,128,1,fp8,fp8,0,0.9976960023244222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,2.3330933252970376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,2.1453545888264975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,float16,0,1.0977173646291096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,fp8,0,1.1100107034047444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,2.344165325164795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,128,1,fp8,fp8,0,1.016266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,2.35588804880778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,2.161813259124756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,float16,0,0.6324319839477539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,fp8,0,0.6477226813634237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,1.277728001276652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,128,1,fp8,fp8,0,0.6053706804911295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,1.2918186982472737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,1.1921706994374592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,float16,0,0.5600586732228597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,1.194111982981364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,fp8,0,0.5661226511001587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,128,1,fp8,fp8,0,0.518725315729777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,1.1986506779988606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,1.1037866274515789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,float16,0,0.5633493264516195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,fp8,0,0.569045344988505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,1.1978613535563152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,128,1,fp8,fp8,0,0.5242666800816854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,1.2044106324513753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,1.1096266905466716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,float16,0,0.571616013844808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,1.2068479855855305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,fp8,0,0.5800480047861735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,128,1,fp8,fp8,0,0.5333866675694784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,1.214138666788737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,1.1178346474965413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,float16,0,0.3378293514251709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,fp8,0,0.3471200068791707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,0.6748692989349365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,128,1,fp8,fp8,0,0.326474666595459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,0.6822773615519205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,0.6318720181783041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,float16,0,0.2985546588897705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,fp8,0,0.30210665861765545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,0.6292800108591715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,128,1,fp8,fp8,0,0.28354666630427044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,0.6339146693547567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,0.5887360175450643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,float16,0,0.30351465940475464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,fp8,0,0.3061866760253906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,0.6334559917449951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,128,1,fp8,fp8,0,0.28511999050776166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,0.6371466716130575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,0.5903466542561849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,float16,0,0.30718932549158734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,0.6382613182067871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,fp8,0,0.3123146692911784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,128,1,fp8,fp8,0,0.29020800193150836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,0.6436586777369181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,float16,0,0.19513066609700522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,0.594485322634379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,fp8,0,0.19952533642450967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,0.37413867314656574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,128,1,fp8,fp8,0,0.1889280080795288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,0.37834668159484863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,0.3530240058898926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,float16,0,0.17100266615549722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,0.3463519811630249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,fp8,0,0.17297599713007608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,128,1,fp8,fp8,0,0.16364799936612448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,0.3506773312886556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,0.3264159957567851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,float16,0,0.17315733432769775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,fp8,0,0.17545066277186075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,0.34855465094248456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,128,1,fp8,fp8,0,0.16478400429089865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,0.35002132256825763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,0.33029333750406903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,float16,0,0.17466133832931519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,fp8,0,0.17865600188573202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,0.35173332691192627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,128,1,fp8,fp8,0,0.17081065972646078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,0.3552480141321818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,0.33447468280792236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,float16,0,0.1388106644153595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.2383520007133484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,fp8,0,0.13834666212399802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,128,1,fp8,fp8,0,0.13205333550771078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.2404693365097046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.2242506742477417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,float16,0,0.13588266571362814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.23626667261123657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,fp8,0,0.13614400227864584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,128,1,fp8,fp8,0,0.12972266475359598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.23625600337982178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.22713599602381387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,float16,0,0.13829333583513895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.23616532484690347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,fp8,0,0.13590932885805765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,128,1,fp8,fp8,0,0.12897066275278726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.2369920015335083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.22266666094462076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,float16,0,0.13607999682426453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,fp8,0,0.13617600003878275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.23681066433588663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,128,1,fp8,fp8,0,0.12796266873677573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.2363413373629252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.22193066279093424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,float16,0,1.5891307195027669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,2.8887411753336587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,fp8,0,1.6016799608866374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,128,1,fp8,fp8,0,1.4604159990946453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,2.904026667277018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,2.6695146560668945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,float16,0,1.6150719324747722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,fp8,0,1.615066687266032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,2.9034719467163086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,128,1,fp8,fp8,0,1.4765067100524902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,2.9166294733683267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,2.6797707875569663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,float16,0,1.6274186770121257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,fp8,0,1.6410080591837566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,2.9379040400187173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,128,1,fp8,fp8,0,1.5061759948730469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,2.9524478912353516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,2.7166080474853516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,float16,0,0.9209493001302084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,fp8,0,0.9396106402079264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,1.595919926961263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,128,1,fp8,fp8,0,0.8761920134226481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,1.6133972803751628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,1.490671952565511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,float16,0,0.8094186782836914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,fp8,0,0.8172907034556071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,1.4690186182657878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,128,1,fp8,fp8,0,0.7455946604410807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,1.477285385131836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,1.3584267298380535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,float16,0,0.817530632019043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,fp8,0,0.8259039719899496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,1.4747360547383626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,128,1,fp8,fp8,0,0.7534346580505371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,1.4831466674804688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,1.3666240374247234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,float16,0,0.8271306355794271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,fp8,0,0.8362399737040201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,1.4901493390401204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,128,1,fp8,fp8,0,0.767082691192627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,1.5004480679829915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,1.3798346519470215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,float16,0,0.4798346757888794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,fp8,0,0.49081599712371826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,0.8262826601664225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,128,1,fp8,fp8,0,0.4595573345820109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,0.8373653093973795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,0.7750559647878011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,float16,0,0.4235573212305705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,0.7615520159403483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,fp8,0,0.42739200592041016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,128,1,fp8,fp8,0,0.3938933213551839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,0.7655359903971354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,0.7097386519114176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,float16,0,0.426800012588501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,fp8,0,0.43324800332387287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,0.7685333093007406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,128,1,fp8,fp8,0,0.39825065930684406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,0.7710239887237549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,0.7126560211181641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,float16,0,0.4330720106760661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,fp8,0,0.4384320179621379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,0.7733759880065918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,128,1,fp8,fp8,0,0.4043360153834025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,0.7804533640543619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,0.720357338587443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,float16,0,0.2591200073560079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,fp8,0,0.2653546730677287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,0.4416160186131795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,128,1,fp8,fp8,0,0.2510720094045003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,0.4479893445968628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,0.41710933049519855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,float16,0,0.2262506683667501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,fp8,0,0.22826667626698813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,0.40572798252105713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,128,1,fp8,fp8,0,0.2164693276087443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,0.4078506628672282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,0.3829653263092041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,float16,0,0.22992000977198282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,0.4089226722717285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,fp8,0,0.23229867219924927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,128,1,fp8,fp8,0,0.2189120054244995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,0.41094934940338135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,0.3859359820683797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,float16,0,0.23428267240524292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,0.4127519925435384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,fp8,0,0.2364693284034729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,128,1,fp8,fp8,0,0.22228266795476279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,0.4166133403778076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,float16,0,0.15067733327547708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,0.3886559804280599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.25148266553878784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,fp8,0,0.1527839998404185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,128,1,fp8,fp8,0,0.14782933394114176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.2534293333689372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,0.23861332734425864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,float16,0,0.13202133774757385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.23036799828211466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,fp8,0,0.13219199577967325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,128,1,fp8,fp8,0,0.12578133742014566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.2305333415667216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.21741867065429688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,float16,0,0.13197333614031473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.232314666112264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,fp8,0,0.13411200046539307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,128,1,fp8,fp8,0,0.12795733412106833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.23400533199310303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.22018667062123617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.23244265715281168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,fp8,0,0.136053333679835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,fp8,fp8,0,0.130730668703715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.23659199476242065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.22219200929005942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,float16,0,0.10681600371996562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.16455466548601785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,fp8,0,0.10618666807810466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,128,1,fp8,fp8,0,0.10280000170071919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.17044800519943237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.15877333283424377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,float16,0,0.10727999607721965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.16453333695729574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,fp8,0,0.1074186662832896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,128,1,fp8,fp8,0,0.10094400246938069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.1644533375898997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.15530133247375488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,float16,0,0.107424000898997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.1644159952799479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,fp8,0,0.10670933127403259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,128,1,fp8,fp8,0,0.10123733679453532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.1650826632976532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.15449600418408713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,float16,0,0.10750933488210042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.16481066743532816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,fp8,0,0.10737066467603047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,128,1,fp8,fp8,0,0.10105599959691365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.16492266456286112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.15429332852363586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,float16,0,2.125055948893229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,3.136176109313965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,fp8,0,2.1394294102986655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,128,1,fp8,fp8,0,1.936031977335612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,3.1451307932535806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,2.8815787633260093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,float16,0,2.1586079597473145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,3.1678454081217446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,fp8,0,2.16377592086792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,128,1,fp8,fp8,0,1.9547252655029297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,3.172682762145996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,2.9006932576497397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,float16,0,2.1843040784200034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,3.202202796936035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,fp8,0,2.200933297475179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,128,1,fp8,fp8,0,2.0016533533732095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,3.2171732584635415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,2.9482294718424478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,float16,0,1.2092373371124268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,1.740063985188802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,fp8,0,1.2286400000254314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,128,1,fp8,fp8,0,1.1497493584950764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,1.7562719980875652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,1.6316213607788086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,float16,0,1.058469295501709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,1.5700106620788574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,fp8,0,1.0685280164082844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,128,1,fp8,fp8,0,0.9709920088450114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,1.5780213673909504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,1.4494773546854656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,float16,0,1.069642702738444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,1.5803839365641277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,fp8,0,1.078768014907837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,128,1,fp8,fp8,0,0.9809813499450684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,1.5888106028238933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,1.459546724955241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,float16,0,1.0861066977183025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,1.601701259613037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,fp8,0,1.0949493249257405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,128,1,fp8,fp8,0,1.0003413359324138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,1.6094346046447754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,1.4792532920837402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,float16,0,0.6194186607996622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,0.8895946343739828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,fp8,0,0.631717324256897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,128,1,fp8,fp8,0,0.5893280108769735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,0.9004906813303629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,0.836085319519043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,float16,0,0.5441653331120809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,0.8052159945170084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,fp8,0,0.5496053298314413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,128,1,fp8,fp8,0,0.502618670463562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,0.8096586863199869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,0.7473493417104086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,float16,0,0.5499360164006551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,0.8114079634348551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,fp8,0,0.555247982343038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,128,1,fp8,fp8,0,0.5072106520334879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,0.8151146570841471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,0.7512319882710775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,float16,0,0.5574560165405273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,0.8209760189056396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,fp8,0,0.5655519962310791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,128,1,fp8,fp8,0,0.5183573166529337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,0.8269173304239908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,0.7614506880442301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,float16,0,0.3249280055363973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,0.466810663541158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,fp8,0,0.33312533299128216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,128,1,fp8,fp8,0,0.3118879993756612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,0.473685344060262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,0.44008533159891766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,float16,0,0.2839039961496989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,0.4209280014038086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,fp8,0,0.2876853346824646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,128,1,fp8,fp8,0,0.26895467440287274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,0.42443732420603436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,0.39530134201049805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,float16,0,0.28785600264867145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,0.4240533510843913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,fp8,0,0.2906026641527812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,128,1,fp8,fp8,0,0.27161065737406415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,0.42858131726582843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,0.3983519872029622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,float16,0,0.293232003847758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,0.4318986733754476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,fp8,0,0.2976906696955363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,128,1,fp8,fp8,0,0.27727999289830524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,0.43476800123850506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,0.40304001172383624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,float16,0,0.17899733781814575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.2539520064989726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,fp8,0,0.18317866325378418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,128,1,fp8,fp8,0,0.1750719944636027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.25894399483998615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,0.2431946595509847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,float16,0,0.15478400389353433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.22673600912094116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,fp8,0,0.15638400117556253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,128,1,fp8,fp8,0,0.14804266889890036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.2304533322652181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.21578667561213175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,float16,0,0.15441067020098367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.228112002213796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,fp8,0,0.15664000312487283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,128,1,fp8,fp8,0,0.1490079959233602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.23054933547973633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.21810666720072427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,float16,0,0.15964800119400024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.23188267151514688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,fp8,0,0.1606559952100118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,128,1,fp8,fp8,0,0.15409066279729208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.23416000604629517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.22248532374699911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,float16,0,0.10485333204269409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.14698666334152222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,fp8,0,0.10713066657384236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,128,1,fp8,fp8,0,0.10551466544469197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.15040533741315207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.14428266882896423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,float16,0,0.09504000345865886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.13550399740537009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,fp8,0,0.09498133261998494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,128,1,fp8,fp8,0,0.08780266841252644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.13621866703033447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.12762666742006937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,float16,0,0.09497066338857015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.13608533143997192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,fp8,0,0.0965226689974467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,128,1,fp8,fp8,0,0.08834667007128398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.13690132896105447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.12799466649691263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,float16,0,0.10042132933934529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.1409333348274231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,fp8,0,0.09709333380063374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,128,1,fp8,fp8,0,0.09012266993522644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.13833600282669067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.1306666632493337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,float16,0,0.07894933223724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.10367467006047566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,fp8,0,0.07813333471616109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,128,1,fp8,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.10301333665847778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.09693333506584167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,float16,0,0.07683200140794118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.1034346620241801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,fp8,0,0.07874133189519246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,128,1,fp8,fp8,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.10300266742706299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.09743466973304749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,float16,0,0.07854933540026347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.10332799951235454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,fp8,0,0.07813333471616109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,128,1,fp8,fp8,0,0.07239466905593872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.10301867127418518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,float16,0,0.07834133505821228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.10192533334096272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,fp8,0,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,128,1,fp8,fp8,0,0.07254933317502339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.1029813289642334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.0979146659374237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,float16,0,1.5832853317260742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,2.0701120694478354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,fp8,0,1.5912532806396484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,128,1,fp8,fp8,0,1.4449599583943684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,2.0845707257588706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,1.9088053703308105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,float16,0,1.5972639719645183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,2.090544064839681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,fp8,0,1.6057546933492024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,128,1,fp8,fp8,0,1.4620107014973958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,2.1009066899617515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,1.9249547322591145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,float16,0,1.622528076171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,2.1178666750590005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,fp8,0,1.633087952931722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,128,1,fp8,fp8,0,1.4866132736206055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,2.130714734395345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,1.952730655670166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,float16,0,0.9103413422902426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,1.1744320392608643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,fp8,0,0.9262613455454508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,128,1,fp8,fp8,0,0.8649226824442545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,1.1884160041809082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,1.1033120155334473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,float16,0,0.7967893282572428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,1.0456159909566243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,fp8,0,0.8045813242594401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,128,1,fp8,fp8,0,0.7323413689931234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,1.0547040303548176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,0.9682933489481608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,float16,0,0.8042559623718262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,1.0556639830271404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,fp8,0,0.812058687210083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,128,1,fp8,fp8,0,0.7402400175730387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,1.0638720194498699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,0.9762559731801351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,float16,0,0.8142666816711426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,1.068336009979248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,fp8,0,0.8239839871724447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,128,1,fp8,fp8,0,0.7534826596577963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,1.0775413513183594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,0.9899093310038248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,float16,0,0.4691679875055949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,fp8,0,0.47861866156260174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,0.604853351910909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,128,1,fp8,fp8,0,0.4468799829483032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,0.6144320170084635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,0.5699679851531982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,float16,0,0.4099839925765991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,fp8,0,0.4142293135325114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,0.5391199986139933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,128,1,fp8,fp8,0,0.3819253444671631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,0.5431253512700399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,0.5037759939829508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,float16,0,0.4170986811319987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,0.5455733140309652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,fp8,0,0.4204213221867879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,128,1,fp8,fp8,0,0.3861066500345866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,0.5494079987208048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,0.5066719849904379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,float16,0,0.4220000108083089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,0.5504693190256754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,fp8,0,0.4278666575749715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,128,1,fp8,fp8,0,0.39213331540425617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,0.5567893187204996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,0.5143733421961466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,float16,0,0.24882133801778158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,0.32054932912190753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,fp8,0,0.2547893325487773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,128,1,fp8,fp8,0,0.2397706707318624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,0.3251413305600484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,0.3038986722628276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,float16,0,0.213919997215271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,fp8,0,0.2172693411509196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.28200000524520874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,128,1,fp8,fp8,0,0.2051573395729065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.28545065720876056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,0.26876266797383624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,float16,0,0.2169653375943502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.28408000866572064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,fp8,0,0.21985600392023721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,128,1,fp8,fp8,0,0.208079993724823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.2878986597061157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,0.2715146740277608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,float16,0,0.22297066450119019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.2914186716079712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,fp8,0,0.22626133759816489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,128,1,fp8,fp8,0,0.21215466658274332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.29419199625651044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,0.27563732862472534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,float16,0,0.1378720005353292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.17849600315093994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,fp8,0,0.14109866817792258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.1799573302268982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,128,1,fp8,fp8,0,0.13738666971524557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,float16,0,0.11978666981061299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.17121599117914835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,fp8,0,0.12167466680208842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.15863999724388123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,128,1,fp8,fp8,0,0.11158933242162068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.1590559979279836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.1461013356844584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,float16,0,0.12034133076667786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.15820800264676413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,fp8,0,0.12174399693806966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,128,1,fp8,fp8,0,0.11196800072987874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.16033066312472025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.14842666188875833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,float16,0,0.12286933263142903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.16035733620325723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,fp8,0,0.12381866574287415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,128,1,fp8,fp8,0,0.11758400003115337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.16270933548609415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.15237333377202353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,float16,0,0.08096000055472057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.10337066650390625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,fp8,fp8,0,0.08084799846013387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.10621866583824158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.10351999600728352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,float16,0,0.07477333148320515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.09820800026257832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,fp8,0,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,128,1,fp8,fp8,0,0.07022933165232341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.09824533263842265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.09103467067082723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,float16,0,0.0764213353395462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.09856533010800679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,fp8,0,0.07516799867153168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,128,1,fp8,fp8,0,0.07032533486684163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.09914666414260864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.09299199779828389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,float16,0,0.07680533329645793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,fp8,0,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,128,1,fp8,fp8,0,0.07207466661930084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.09946667154630025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.09292800227801006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,float16,0,0.062090665102005005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.07853333155314128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,128,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.07790400087833405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.07257066667079926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,float16,0,0.06188266475995382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,fp8,0,0.06043200194835663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,128,1,fp8,fp8,0,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.0773226668437322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.07281066477298737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,float16,0,0.061146666606267296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.0784853349129359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,128,1,fp8,fp8,0,0.05793066819508871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.07690666615962982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.07266133526961009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,float16,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.07679999868075053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,fp8,0,0.06224533418814341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,128,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.07687999804814656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.07241066793600719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,float16,0,1.8652693430582683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,2.1763200759887695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,fp8,0,1.862186590830485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,128,1,fp8,fp8,0,1.7706133524576824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,2.171232064565023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,2.0637386639912925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,float16,0,1.8725172678629558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,2.186720053354899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,fp8,0,1.87009064356486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,128,1,fp8,fp8,0,1.7825867335001628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,2.178965409596761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,2.0868800481160483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,float16,0,1.9494773546854656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,2.2539893786112466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,fp8,0,1.897871971130371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,128,1,fp8,fp8,0,1.8693386713663738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,2.2206239700317383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,2.1669333775838218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,float16,0,1.036784013112386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,1.1995786825815837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,fp8,0,1.008687973022461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,128,1,fp8,fp8,0,1.0035359859466553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,1.1795360247294109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,1.1621493498484294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,float16,0,0.9426080385843912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,1.098527987798055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,fp8,0,0.9408799807230631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,128,1,fp8,fp8,0,0.890725294748942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,1.1011253197987874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,1.043610652287801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,float16,0,0.9485599994659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,1.12772798538208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,fp8,0,0.9455359776814779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,128,1,fp8,fp8,0,0.9007999897003174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,1.1058986981709797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,1.0519519646962483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,float16,0,0.95469864209493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,1.1144320170084636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,fp8,0,0.9518346786499023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,128,1,fp8,fp8,0,0.9318719704945883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,1.1115039984385173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,1.0796106656392415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,float16,0,0.5289653142293295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,0.6176213423411051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,fp8,0,0.5254826545715332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,128,1,fp8,fp8,0,0.5086559851964315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,0.6060266494750977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,0.5904213190078735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,float16,0,0.48019735018412274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,0.561466654141744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,fp8,0,0.4806133508682251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,128,1,fp8,fp8,0,0.4546186526616414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,0.5723146597544352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,0.5340373516082764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,float16,0,0.4886453151702881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,0.5651893218358358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,fp8,0,0.4843146800994873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,128,1,fp8,fp8,0,0.4599466721216838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,0.5661653280258179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,0.5402400096257528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,float16,0,0.48748799165089923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,0.5781600077946981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,fp8,0,0.4904693365097046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,128,1,fp8,fp8,0,0.4646933476130168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,0.5675573348999023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,0.5413013299306234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,float16,0,0.27171732981999713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,0.3198453386624654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,fp8,0,0.2698720097541809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,128,1,fp8,fp8,0,0.271781325340271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,0.31514134009679157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,0.30819199482599896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,float16,0,0.2500053246815999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.29125332832336426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,fp8,0,0.2495573361714681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,128,1,fp8,fp8,0,0.23764799038569132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.29393066962560016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,0.27962666749954224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,float16,0,0.25167999664942425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.29422932863235474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,fp8,0,0.25335999329884845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,128,1,fp8,fp8,0,0.2424479921658834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.2953173319498698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,0.2839786609013875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,float16,0,0.25410133600234985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,0.29716267188390094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,fp8,0,0.2531680067380269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,128,1,fp8,fp8,0,0.24329600731531778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,0.29791466395060223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,0.2848320007324219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,float16,0,0.14873600006103516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.1742186745007833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,fp8,0,0.14656000336011252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,128,1,fp8,fp8,0,0.14613866806030273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.1723680098851522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.16933866341908774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,float16,0,0.13408000270525613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.15693333745002747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,fp8,0,0.1339413324991862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,128,1,fp8,fp8,0,0.12718400359153748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.15877866744995117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.15056000153223673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,float16,0,0.1353653371334076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.15968533356984457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,fp8,0,0.13526399930318198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,128,1,fp8,fp8,0,0.12943466504414877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.158160001039505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.1520799994468689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,float16,0,0.13617600003878275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.16078399618466696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,fp8,0,0.1357919971148173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,128,1,fp8,fp8,0,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.15550933281580606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,float16,0,0.08337066570917766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.09787733356157939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,fp8,0,0.0817440003156662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,128,1,fp8,fp8,0,0.08360000451405843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.09547199805577596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.09654933214187622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,float16,0,0.0764160007238388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.09030933181444804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,fp8,0,0.07610133290290833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,128,1,fp8,fp8,0,0.0710453341404597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.08888000249862671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.08365333080291748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,float16,0,0.07673599819342296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.09065066774686177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,fp8,0,0.07530666887760162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,128,1,fp8,fp8,0,0.07062933345635732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.08918399612108867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.08378666639328003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,float16,0,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.09082667032877605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,fp8,0,0.07720000048478444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,128,1,fp8,fp8,0,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.09004799524943034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.08503466844558716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,float16,0,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.05604266623655955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,fp8,0,0.04987200101216634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,128,1,fp8,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.0537013312180837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,float16,0,0.048063998421033226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.05446400245030721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,128,1,fp8,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.05449066559473673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,fp8,0,0.048245335618654885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,128,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.05596266686916351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.053210665782292686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,float16,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,fp8,0,0.04880533119042715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,128,1,fp8,fp8,0,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.05426133175690969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.05217599868774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,float16,0,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,128,1,fp8,fp8,0,0.03498666733503342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,128,1,fp8,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,float16,0,0.03402133285999298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,128,1,fp8,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.03833599885304769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,float16,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,128,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,float16,0,1.8082292874654133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,1.8169493675231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,fp8,0,1.8034133911132812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,128,1,fp8,fp8,0,1.741498629252116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,1.813968022664388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,1.752629280090332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,float16,0,1.8140959739685059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,1.8284746805826824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,fp8,0,1.8404906590779622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,128,1,fp8,fp8,0,1.74019193649292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,1.8217333157857258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,1.7982880274454753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,float16,0,1.8783733050028484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,1.9530879656473796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,fp8,0,1.856405258178711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,128,1,fp8,fp8,0,1.817951997121175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,1.876970609029134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,1.8398987452189128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,float16,0,0.9981866677602133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,1.0122559865315754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,fp8,0,0.9797866344451904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,128,1,fp8,fp8,0,0.9768586953481039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,0.9951573212941488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,0.9907093048095703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,float16,0,0.9310346444447836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,0.918287992477417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,fp8,0,0.9099360307057699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,128,1,fp8,fp8,0,0.8635040124257406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,0.917301336924235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,0.8726080258687338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,float16,0,0.9149813652038574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,0.9233706792195638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,fp8,0,0.9142293135325114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,128,1,fp8,fp8,0,0.8813920021057129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,0.9208906491597494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,0.8812906742095947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,float16,0,0.9249280293782552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,0.9324586391448975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,fp8,0,0.9242560068766276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,128,1,fp8,fp8,0,0.895418643951416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,0.9313866297403971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,0.909600019454956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,float16,0,0.5126133362452189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,0.5204746723175049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,fp8,0,0.5009546677271525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,128,1,fp8,fp8,0,0.4962186813354492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,0.5106026728947958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,0.5035519997278849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,float16,0,0.4659680128097534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,0.4695520003636678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,fp8,0,0.4644320011138916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,128,1,fp8,fp8,0,0.4411733150482178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,0.4689600070317586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,0.44756801923116046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,float16,0,0.46881600220998126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,0.4721333185831706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,fp8,0,0.46933865547180176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,128,1,fp8,fp8,0,0.445029338200887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,0.4726879994074504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,0.45264001687367755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,float16,0,0.47222399711608887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,0.4767040014266968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,fp8,0,0.47095998128255206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,128,1,fp8,fp8,0,0.44994131724039715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,0.4755359888076782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,0.4562400182088216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,float16,0,0.2653013269106547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,0.2700106700261434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,fp8,0,0.25945067405700684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,128,1,fp8,fp8,0,0.2590826749801636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,0.26548266410827637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,0.2637919982274373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,float16,0,0.24269866943359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.24472532669703165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,fp8,0,0.24215465784072876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,128,1,fp8,fp8,0,0.23094399770100912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.2444960077603658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.2332800030708313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,float16,0,0.24495999018351236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.24663466215133667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,fp8,0,0.24478934208552042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,128,1,fp8,fp8,0,0.23597866296768188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.24658666054407755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.2376906673113505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,float16,0,0.24663466215133667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.249616007010142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,fp8,0,0.24623999993006387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,128,1,fp8,fp8,0,0.2360373338063558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.2486506700515747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.23891733090082803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,float16,0,0.14442132910092673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.14732799927393594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,fp8,0,0.14307733376820883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,128,1,fp8,fp8,0,0.1437120040257772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.14415466785430908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.14450666308403015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,float16,0,0.12948266665140787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.13010133306185404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,fp8,0,0.13011733690897623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,128,1,fp8,fp8,0,0.12433600425720215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.13079999883969626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.12458667159080505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,float16,0,0.13131200273831686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.13205333550771078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,fp8,0,0.13103999694188437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,128,1,fp8,fp8,0,0.12601066629091898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.13216533263524374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.12803733348846436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,float16,0,0.13185066978136697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.1320266624291738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,fp8,0,0.1325440009435018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,128,1,fp8,fp8,0,0.12942933042844137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.13432000080744425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.13010666767756143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,float16,0,0.07859200239181519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.07989866534868877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,fp8,0,0.07994133234024048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,128,1,fp8,fp8,0,0.08157333234945933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.0788320004940033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.08107733229796092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,float16,0,0.0728000005086263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.07348266740640004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,fp8,0,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,128,1,fp8,fp8,0,0.07047999898592631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.07339199880758922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.06960000097751617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,float16,0,0.07441066702206929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.0758186678091685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,fp8,0,0.07499733567237854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,128,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.07448533177375793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.07037866612275441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,float16,0,0.07506133119265239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.07496533294518788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,fp8,0,0.0739519993464152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,128,1,fp8,fp8,0,0.06957866748174031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.07446399827798207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.07066133121649425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,float16,0,0.04797333478927612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,fp8,0,0.048250665267308555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,128,1,fp8,fp8,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.04646400113900503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,float16,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,fp8,0,0.046223998069763184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,128,1,fp8,fp8,0,0.04432533184687296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.04403733213742574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.04665599763393402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,128,1,fp8,fp8,0,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,float16,0,0.04615999758243561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,128,1,fp8,fp8,0,0.04448533554871877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.04750399788220724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,float16,0,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,128,1,fp8,fp8,0,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,128,1,fp8,fp8,0,0.03267733256022135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.03485333422819773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,float16,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,128,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.034976000587145485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,fp8,0,0.03416533271471659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,128,1,fp8,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,128,1,fp8,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,128,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,float16,0,0.8357706864674886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,0.818282683690389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,fp8,0,0.8351199626922607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,128,1,fp8,fp8,0,0.7826826572418213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,0.8151146570841471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,0.7689119974772135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,float16,0,0.841205358505249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,0.8251787026723226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,fp8,0,0.8392586708068848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,128,1,fp8,fp8,0,0.7943039735158285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,0.8230079809824625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,0.7770293553670248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,float16,0,0.850757360458374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,0.8340426286061605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,fp8,0,0.8459306557973226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,128,1,fp8,fp8,0,0.8333120346069336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,0.8308640321095785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,0.8203466733296713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,float16,0,0.473904013633728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,0.46540268262227374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,fp8,0,0.4637226661046346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,128,1,fp8,fp8,0,0.4596800009409587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,0.4556266864140828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,0.45163198312123615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,float16,0,0.4238293170928955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,0.41573333740234375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,fp8,0,0.42353065808614093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,128,1,fp8,fp8,0,0.4006346861521403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,0.4154133399327596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,0.3936053514480591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,float16,0,0.42971734205881756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,0.42078932126363117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,fp8,0,0.4275733232498169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,128,1,fp8,fp8,0,0.4054826498031616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,0.4201279878616333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,0.39900267124176025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,float16,0,0.4323466618855794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,0.42453332742055255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,fp8,0,0.43133334318796795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,128,1,fp8,fp8,0,0.4116693337758382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,0.4229280153910319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,0.40410133202870685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,float16,0,0.2448213299115499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,0.2414026657740275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,fp8,0,0.24039999643961588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,128,1,fp8,fp8,0,0.24061334133148193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,0.237226665019989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,0.23678400119145712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,float16,0,0.2199946641921997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.21618666251500449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,fp8,0,0.22099733352661133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,128,1,fp8,fp8,0,0.20996799071629843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.21583465735117593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.20482667287190756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,float16,0,0.22218134005864462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.2182826598485311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,fp8,0,0.22341332832972208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,128,1,fp8,fp8,0,0.21410665909449259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.21951999266942343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.21066667636235556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,float16,0,0.22484799226125082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.2202826738357544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,fp8,0,0.22407466173171997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,128,1,fp8,fp8,0,0.2156160076459249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.2210879921913147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.21151467164357504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,float16,0,0.13426132996877035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.1318773329257965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,fp8,0,0.1328480045000712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,128,1,fp8,fp8,0,0.1341546674569448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.13077867031097412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.1325279970963796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,float16,0,0.11981333295504253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.11728533109029134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,fp8,0,0.12006933490435283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,128,1,fp8,fp8,0,0.11358400185902913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.11775466799736023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.11154133081436157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,float16,0,0.12122666835784912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.11889066298802693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,fp8,0,0.12069333593050639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,128,1,fp8,fp8,0,0.11542399724324544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.1181813379128774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.1136853297551473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,float16,0,0.12172266840934753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.11964799960454305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,fp8,0,0.12168533603350322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,128,1,fp8,fp8,0,0.11737066507339478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.11981333295504253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.11565333604812622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,float16,0,0.07649066547552745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.07413866619269054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,fp8,0,0.07602666815121968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,128,1,fp8,fp8,0,0.07690666615962982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.07250133156776428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.07558933397134145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,float16,0,0.07064533233642578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.06868800024191539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,fp8,0,0.06938666601975758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,128,1,fp8,fp8,0,0.06506666541099548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.06901866694291432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,float16,0,0.07050133248170216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.06834133466084798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,fp8,0,0.06937066713968913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,128,1,fp8,fp8,0,0.06444266438484192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.06804266571998596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.06354133288065593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,float16,0,0.07017600039641063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,fp8,0,0.07060799996058147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,128,1,fp8,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.06853333115577698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.06465599934260051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.04576533536116282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,fp8,0,0.04567466676235199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,128,1,fp8,fp8,0,0.04498666524887085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.044495999813079834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,float16,0,0.044677332043647766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.04429866870244344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,fp8,0,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,128,1,fp8,fp8,0,0.04243200023969015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.043178667624791466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,float16,0,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.043738668163617454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,128,1,fp8,fp8,0,0.04404800136884054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,float16,0,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.04460800190766653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,128,1,fp8,fp8,0,0.044079999128977455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,float16,0,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,128,1,fp8,fp8,0,0.032111999889214836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,128,1,fp8,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,float16,0,0.03176533430814743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,fp8,0,0.03268799930810928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,128,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.03181333343187968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,128,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.03194666653871536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,128,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.02363733450571696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,float16,0,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,float16,0,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,float16,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,float16,0,0.4508479833602905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,0.4515146811803182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,fp8,0,0.4506346782048543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,128,1,fp8,fp8,0,0.4329440196355184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,0.4508746862411499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,0.43246932824452716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,float16,0,0.45586665471394855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,0.45653867721557617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,fp8,0,0.45502932866414386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,128,1,fp8,fp8,0,0.4408533175786336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,0.45469868183135986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,0.4413599967956543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,float16,0,0.46163201332092285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,0.4625493288040161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,fp8,0,0.456773320833842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,128,1,fp8,fp8,0,0.4488319953282674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,0.45905065536499023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,0.44642666975657147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,float16,0,0.25712533791859943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,0.25707733631134033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,fp8,0,0.2528160015741984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,128,1,fp8,fp8,0,0.2566293279329936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,0.2539680004119873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,0.25835732618967694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,float16,0,0.23426665862401327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.23441066344579062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,fp8,0,0.2334453264872233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,128,1,fp8,fp8,0,0.22353067000706991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.23195199171702066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.22434133291244507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,float16,0,0.2364906668663025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.23737066984176636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,fp8,0,0.2357813318570455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,128,1,fp8,fp8,0,0.23015467325846353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.23638399442036948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.23164800802866617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,float16,0,0.23938133319218954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.23874666293462118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,fp8,0,0.23813333113988241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,128,1,fp8,fp8,0,0.23281600077946982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.2375040054321289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.2327573299407959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,float16,0,0.13948800166447958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.13948800166447958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,fp8,0,0.13666133085886636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,128,1,fp8,fp8,0,0.14018133282661438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.1361066699028015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.13969600200653076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,float16,0,0.12467199563980103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.12478933731714885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,fp8,0,0.124208003282547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,128,1,fp8,fp8,0,0.11755733688672383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.12366400162378947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.11780800422032674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,float16,0,0.12654933333396912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.12640000383059183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,fp8,0,0.1269973317782084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,128,1,fp8,fp8,0,0.12164266904195149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.12523733576138815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.12025599678357442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,float16,0,0.12643733620643616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.12634133299191794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,fp8,0,0.1274133324623108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,128,1,fp8,fp8,0,0.12427733341852824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.12575466434160867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.12377599875132243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,float16,0,0.07645866771539052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.07642666498819987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,fp8,0,0.07658133407433827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,128,1,fp8,fp8,0,0.07958399752775829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.07442666590213776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,float16,0,0.07036266724268596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.06959466636180878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,128,1,fp8,fp8,0,0.0666293352842331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.07036800185839336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.0666293352842331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,float16,0,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.07023466626803081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,fp8,0,0.07061333457628886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,128,1,fp8,fp8,0,0.06683200101057689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.07043733199437459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.06650666892528534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,float16,0,0.07012266914049785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.07045866549015045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,fp8,0,0.07039466500282288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,128,1,fp8,fp8,0,0.06857066849867503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.07042133311430614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.06670933465162913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,float16,0,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,128,1,fp8,fp8,0,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,float16,0,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.043840001026789345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,fp8,0,0.0423573354880015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,128,1,fp8,fp8,0,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.04312000175317129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.04158399999141693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,float16,0,0.0439626673857371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.043840001026789345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,128,1,fp8,fp8,0,0.04247466723124186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.043141335248947144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.04251199960708618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,float16,0,0.04493333399295807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.04477333525816599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,fp8,0,0.04461866617202759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,128,1,fp8,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,float16,0,0.031925333042939506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,128,1,fp8,fp8,0,0.033146666983763375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.029946667452653248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,float16,0,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,128,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,float16,0,0.029690665503342945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,128,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.029717333614826202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,float16,0,0.029658667743206024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,128,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,128,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,128,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,float16,0,0.0242399995525678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.024688000480333965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,128,1,fp8,fp8,0,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,128,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,128,1,fp8,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,float16,0,0.32434133688608807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.3239946762720744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,fp8,0,0.32208534081776935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,128,1,fp8,fp8,0,0.3040320078531901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.32172266642252606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,1.8151200612386067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.30348799626032513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,float16,0,0.32657066980997723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.32525867223739624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,fp8,0,0.3256160020828247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,128,1,fp8,fp8,0,0.3099840084711711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.3267093300819397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.30934399366378784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,float16,0,0.3297920028368632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.32733867565790814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,fp8,0,0.32868266105651855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,128,1,fp8,fp8,0,0.3097813328107198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.3287786642710368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.3107146620750427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,float16,0,0.18314667542775473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.18330667416254678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,fp8,0,0.18117332458496094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,128,1,fp8,fp8,0,0.17903467019399008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.18099733193715414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.1792800029118856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,float16,0,0.17017066478729248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.1690666675567627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,fp8,0,0.17071467638015747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,128,1,fp8,fp8,0,0.15873066584269205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.17081065972646078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.15877333283424377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,float16,0,0.17101866006851196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.17085866133371988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,fp8,0,0.17114132642745972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,128,1,fp8,fp8,0,0.16126933693885803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.1720106601715088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.16077333688735962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,float16,0,0.1727466583251953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.17081065972646078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,fp8,0,0.17309333880742392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,128,1,fp8,fp8,0,0.16353066762288412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.17307732502619425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,float16,0,0.09712533156077068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.1634666621685028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.09723200400670369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,fp8,0,0.09732266267140706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,128,1,fp8,fp8,0,0.09745066364606221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,float16,0,0.09115733702977498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.0925600032011668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,fp8,0,0.09185066819190979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,128,1,fp8,fp8,0,0.08709333340326945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.09089066584904988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.08685866991678874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,float16,0,0.0909493366877238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.09298666318257649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,fp8,0,0.09167466560999553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,128,1,fp8,fp8,0,0.08620267113049825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.09120532870292664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.08654933174451192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,float16,0,0.09073600172996521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.09285866220792134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,fp8,0,0.09116799632708232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,128,1,fp8,fp8,0,0.08678932984670003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.09092266360918681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.0869706670443217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,float16,0,0.05422399938106537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.0555626650651296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,128,1,fp8,fp8,0,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.0539680023988088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,float16,0,0.05429866909980774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.054671997825304665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,fp8,0,0.05394133428732554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,128,1,fp8,fp8,0,0.05202133456865946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.05187733471393585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,float16,0,0.05322133501370748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,fp8,0,0.05421866476535797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,128,1,fp8,fp8,0,0.05217066903909048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,float16,0,0.05406400064627329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,fp8,0,0.05415999889373779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,128,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.05364799996217092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,float16,0,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,fp8,0,0.036229332288106285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,128,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,float16,0,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,128,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,128,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,float16,0,0.035829332967599235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.03613866617282232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,fp8,0,0.03506666670242945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,128,1,fp8,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,128,1,fp8,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,float16,0,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.026719999810059864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.025818665822347004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,float16,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,128,1,fp8,fp8,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.02611733227968216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,float16,0,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.02619733413060506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,128,1,fp8,fp8,0,0.0258240004380544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.02584533393383026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,float16,0,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,float16,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,128,1,fp8,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,float16,0,0.2616479992866516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.2614133358001709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,fp8,0,0.26235200961430866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,128,1,fp8,fp8,0,0.2407039999961853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.2592960000038147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.24074665705362955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,float16,0,0.26103999217351276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.26180799802144367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,fp8,0,0.2624533375104268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,128,1,fp8,fp8,0,0.24458134174346924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.2607626716295878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.24234133958816528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,float16,0,0.2717439929644267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.27457066377003986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,fp8,0,0.262442668279012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,128,1,fp8,fp8,0,0.24546666940053305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.2613653341929118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.2449386715888977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,float16,0,0.14220266540845236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.14239466190338135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,fp8,0,0.14305599530537924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,128,1,fp8,fp8,0,0.13827199737230936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.1420746644337972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.14018133282661438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,float16,0,0.1379200021425883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.13797332843144736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,fp8,0,0.13772799571355185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,128,1,fp8,fp8,0,0.1276586651802063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.13700266679128012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.12780800461769104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,float16,0,0.14049599568049112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,fp8,0,0.13593600193659464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,128,1,fp8,fp8,0,0.12643200159072876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.1363200048605601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.12769599755605063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,float16,0,0.13808533549308777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.13796266913414001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,fp8,0,0.13798933227856955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,128,1,fp8,fp8,0,0.12800533572832742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.13693867127100626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.12801600495974222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,float16,0,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.07663466533025105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,128,1,fp8,fp8,0,0.0726986676454544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.0744053324063619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,float16,0,0.0746613343556722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.0759200006723404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,fp8,0,0.07445866862932841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,128,1,fp8,fp8,0,0.07072533170382182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.07444266478220622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.07012266914049785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,float16,0,0.07457600037256877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.07640533149242401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,fp8,0,0.07612800101439159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,128,1,fp8,fp8,0,0.07228266696135204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.07593066493670146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,float16,0,0.07524266839027405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.07673066854476929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,fp8,0,0.07644799848397572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,128,1,fp8,fp8,0,0.07236800094445546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.07455466687679291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.07250133156776428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,float16,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.04750399788220724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,128,1,fp8,fp8,0,0.04577599962552389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.04418133199214935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,float16,0,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,128,1,fp8,fp8,0,0.0439626673857371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,float16,0,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,128,1,fp8,fp8,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.04353600243727366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,float16,0,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,128,1,fp8,fp8,0,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.04615999758243561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,float16,0,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,fp8,0,0.0323786661028862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,128,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,128,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,128,1,fp8,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,128,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,128,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,128,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,128,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,float16,0,0.22594666481018066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.22661866744359335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,fp8,0,0.2262666622797648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,128,1,fp8,fp8,0,0.20987733205159506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.22628267606099448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.21076265970865884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,float16,0,0.22641066710154215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.22734934091567993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,fp8,0,0.22642133633295694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,128,1,fp8,fp8,0,0.2098026672999064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.22649067640304565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.21173334121704102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,float16,0,0.22733867168426514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.22618667284647623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,fp8,0,0.22421334187189737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,128,1,fp8,fp8,0,0.2098026672999064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.23724265893300375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.2118826707204183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,float16,0,0.12198399504025777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.12172266840934753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,fp8,0,0.12159466743469238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,128,1,fp8,fp8,0,0.11537599563598633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.12166399757067363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.1153600017229716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,float16,0,0.11985599994659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.11966933806737264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,fp8,0,0.12006400028864543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,128,1,fp8,fp8,0,0.11174933115641277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.11914666493733723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.1135040024916331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,float16,0,0.11980799833933513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.11970133582750957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,fp8,0,0.11955733100573222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,128,1,fp8,fp8,0,0.113237331310908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.11982933680216472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.11313066879908244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,float16,0,0.11980266372362773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.1207360029220581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,fp8,0,0.11959466338157654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,128,1,fp8,fp8,0,0.1143839955329895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.12098667025566101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.11346667011578877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,float16,0,0.06763199965159099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.06844266752401988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,128,1,fp8,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,float16,0,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.06814399858315785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,fp8,0,0.06815466781457265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,128,1,fp8,fp8,0,0.06400000055631001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.06372266511122386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,float16,0,0.06665066878000896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.06654933094978333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,fp8,0,0.06628266473611195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,128,1,fp8,fp8,0,0.06433600187301636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.06840000053246816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,float16,0,0.06640000144640605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.06849599877993266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,128,1,fp8,fp8,0,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.06819200019041698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,float16,0,0.04196799794832865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,128,1,fp8,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,float16,0,0.041936000188191734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,128,1,fp8,fp8,0,0.04077333211898804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.04038399954636892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,float16,0,0.042463997999827065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.0447626660267512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,fp8,0,0.0421013335386912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,128,1,fp8,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.04095466683308283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,float16,0,0.04158399999141693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,128,1,fp8,fp8,0,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.041375999649365745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,float16,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,128,1,fp8,fp8,0,0.028917332490285236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.029098667204380035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,128,1,fp8,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,128,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,fp8,0,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,128,1,fp8,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,float16,0,0.02279466638962428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.022853332261244457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,128,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,float16,0,0.022656001150608063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.022853332261244457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,float16,0,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,128,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,128,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,128,1,fp8,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,float16,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,float16,0,1.7522773742675781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,10.547840118408203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,fp8,0,1.766325314839681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,128,1,fp8,fp8,0,1.6367146174112956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,10.560272216796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,9.778341293334961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,float16,0,1.7745013236999512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,10.570271809895834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,fp8,0,1.7871732711791992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,128,1,fp8,fp8,0,1.6604587237040203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,10.587024052937826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,float16,0,0.13590400417645773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,9.801589330037435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,float16,0,1.7864160537719727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,fp8,0,1.8013653755187988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,128,1,fp8,fp8,0,1.6798720359802246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,10.59396235148112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,10.611157099405924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,float16,0,1.8273706436157227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,9.826053619384766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,fp8,0,1.840010643005371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,128,1,fp8,fp8,0,1.7239467302958171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,10.647136052449545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,10.665840148925781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,float16,0,1.041536013285319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,9.86850102742513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,fp8,0,1.0645546913146973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,5.531898498535156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,128,1,fp8,fp8,0,1.011130650838216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,5.133701324462891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,5.555280049641927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,float16,0,0.9199573198954264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,fp8,0,0.9287253220876058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,128,1,fp8,fp8,0,0.8701120217641195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,5.368341445922852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,5.394549051920573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,4.98085339864095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,float16,0,0.9238346417744955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,fp8,0,0.9326079686482748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,128,1,fp8,fp8,0,0.8678826491038004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,5.378122965494792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,5.3853333791097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,float16,0,0.932090679804484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,4.988165219624837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,fp8,0,0.9426720142364502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,128,1,fp8,fp8,0,0.8779359658559164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,5.3865706125895185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,5.398981094360352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,float16,0,0.9472959836324056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,4.998474756876628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,fp8,0,0.9581066767374674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,128,1,fp8,fp8,0,0.8969173431396484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,5.410751978556315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,5.422431945800781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,float16,0,0.5737066666285197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,5.015738805135091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,fp8,0,0.5902080138524374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,2.8708267211914062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,128,1,fp8,fp8,0,0.5679200092951456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,2.886074701944987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,2.667941411336263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,float16,0,0.5164959828058878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,fp8,0,0.5214986801147461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,128,1,fp8,fp8,0,0.4899199803670247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,2.7939465840657554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,2.7979307174682617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,2.5971627235412598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,float16,0,0.5189333359400431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,2.7995786666870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,fp8,0,0.523690660794576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,128,1,fp8,fp8,0,0.4936586618423462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,2.80240535736084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,float16,0,0.5228853225708008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,2.601674715677897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,fp8,0,0.5287306706110636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,2.803130785624186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,128,1,fp8,fp8,0,0.5026026566823324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,2.8111305236816406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,2.604581356048584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,float16,0,0.529210646947225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,fp8,0,0.5358293453852335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,128,1,fp8,fp8,0,0.5061866839726766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,2.8141600290934243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,2.8203999201456704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,2.6154826482137046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,float16,0,0.39663465817769367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,fp8,0,0.396448016166687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,1.5961012840270996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,128,1,fp8,fp8,0,0.37622400124867755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,1.5968106587727864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,1.4795360565185547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,float16,0,0.3940639893213908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,fp8,0,0.39445332686106366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,1.5845333735148113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,128,1,fp8,fp8,0,0.3755413293838501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,1.5836906433105469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,1.4761919975280762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,float16,0,0.394159992535909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,fp8,0,0.39416531721750897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,1.583936055501302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,128,1,fp8,fp8,0,0.37538135051727295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,1.5859039624532063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,1.4770347277323406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,float16,0,0.3963786760965983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,fp8,0,0.39616533120473224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,1.5869226455688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,128,1,fp8,fp8,0,0.3781333367029826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,1.586176077524821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,1.480826695760091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,float16,0,0.3962666591008504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,fp8,0,0.3960640033086141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,128,1,fp8,fp8,0,0.3775786558787028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,1.588746706644694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,1.4773972829182942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,1.587173302968343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,float16,0,1.3081440130869548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,fp8,0,1.319216012954712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,128,1,fp8,fp8,0,1.2172640164693196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,6.22549311319987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,6.235466639200847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,5.776538848876953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,float16,0,1.315621296564738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,fp8,0,1.327562650044759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,128,1,fp8,fp8,0,1.2321813106536865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,6.234629313151042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,6.248522440592448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,5.789562861124675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,float16,0,1.3285813331604004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,fp8,0,1.340336004892985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,128,1,fp8,fp8,0,1.2481760183970134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,6.251946767171224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,6.26470947265625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,float16,0,1.3502507209777832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,5.806298573811849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,fp8,0,1.366389274597168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,128,1,fp8,fp8,0,1.2787306308746338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,6.2883148193359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,6.300346374511719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,float16,0,0.783023993174235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,5.836714426676433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,fp8,0,0.801530679066976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,128,1,fp8,fp8,0,0.7602240244547526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,3.30789852142334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,3.3267253239949546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,3.0764268239339194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,float16,0,0.6929813226064047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,fp8,0,0.6995840072631836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,128,1,fp8,fp8,0,0.6526186863581339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,3.1944427490234375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,3.1975412368774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,2.966479937235514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,float16,0,0.6973866621653239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,fp8,0,0.7045226891835531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,128,1,fp8,fp8,0,0.6554986635843912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,3.1960906982421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,3.2019678751627603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,2.972186724344889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,float16,0,0.7026613553365072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,fp8,0,0.7099253336588541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,128,1,fp8,fp8,0,0.6638826529184977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,3.2029012044270835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,3.212640126546224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,2.9771785736083984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,float16,0,0.7152213255564371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,fp8,0,0.7240959803263346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,128,1,fp8,fp8,0,0.6758453051249186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,3.2259521484375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,3.231541315714518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,2.9941012064615884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,float16,0,0.4374133348464966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,fp8,0,0.4486986796061198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,1.7378560702006023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,128,1,fp8,fp8,0,0.4288693269093831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,1.7491200764973958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,1.618303934733073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,float16,0,0.39400001366933185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,fp8,0,0.3961333433787028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,128,1,fp8,fp8,0,0.3734453519185384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,1.6799413363138835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,1.6846240361531575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,1.5664480527242024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,float16,0,0.39632534980773926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,fp8,0,0.39827199776967365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,1.6826772689819336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,128,1,fp8,fp8,0,0.3787733316421509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,1.5657386779785156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,1.6863892873128254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,float16,0,0.4023626645406087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,fp8,0,0.4033600091934204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,1.6879946390787761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,128,1,fp8,fp8,0,0.37985066572825116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,1.691482702891032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,1.5730986595153809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,float16,0,0.4049813350041707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,fp8,0,0.4087626536687215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,1.6961119969685872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,128,1,fp8,fp8,0,0.3856053352355957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,1.7011466026306152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,1.5797492663065593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,fp8,0,0.08317866424719493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,float16,0,0.30380799372990924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,fp8,0,0.30422399441401166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,128,1,fp8,fp8,0,0.28914133707682294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,0.991050640741984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,float16,0,0.30190932750701904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,0.9939040342966715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,0.9275893370310465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,fp8,0,0.3020266691843669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,128,1,fp8,fp8,0,0.28780800104141235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,0.9818452994028727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,float16,0,0.3009439905484517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,0.9843893051147461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,0.9179466565450033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,fp8,0,0.303439994653066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,128,1,fp8,fp8,0,0.2865226666132609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,0.9818452994028727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,float16,0,0.30180267492930096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,0.9844533602396647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,0.9246506690979004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,fp8,0,0.30352532863616943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,128,1,fp8,fp8,0,0.2884640097618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,0.9840373198191324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,float16,0,0.3039146661758423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,0.9857119719187418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,0.9188533624013265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,fp8,0,0.3028533260027568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,128,1,fp8,fp8,0,0.2874400019645691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,0.9881760279337565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,0.9186773300170898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,0.9876159826914469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,float16,0,1.0893386999766033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,fp8,0,1.1011733214060466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,128,1,fp8,fp8,0,1.0144106547037761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,4.486202557881673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,float16,0,1.0959253311157227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,4.491488138834636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,4.162975947062175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,fp8,0,1.1366026401519775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,4.49454402923584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,4.500560124715169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,float16,0,1.1054133574167888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,4.173733393351237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,fp8,0,1.1168906688690186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,128,1,fp8,fp8,0,1.0363252957661946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,4.505087852478027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,4.519280115763347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,4.182485262552897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,float16,0,1.1246613661448162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,fp8,0,1.1372640132904053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,128,1,fp8,fp8,0,1.060655991236369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,4.531056086222331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,4.548783938090007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,4.206576029459636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,float16,0,0.6541013320287069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,fp8,0,0.6699679692586263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,128,1,fp8,fp8,0,0.636026660601298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,2.403674602508545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,2.2399892807006836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,2.421610673268636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,float16,0,0.5792586803436279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,fp8,0,0.5849599838256836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,2.311194737752279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,128,1,fp8,fp8,0,0.5447253386179606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,2.1492106119791665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,2.3169652620951333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,float16,0,0.5828479925791422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,fp8,0,0.5883093277613322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,2.314154624938965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,128,1,fp8,fp8,0,0.5493013461430868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,2.321296056111654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,2.156165281931559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,float16,0,0.5870186487833658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,fp8,0,0.5935306549072266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,128,1,fp8,fp8,0,0.5555253426233927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,2.3194665908813477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,2.3291519482930503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,2.1592480341593423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,float16,0,0.597103993097941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,fp8,0,0.6047146717707316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,128,1,fp8,fp8,0,0.5669920047124227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,2.3336373964945474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,2.3443573315938315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,2.1705333391825357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,float16,0,0.3654826482137044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,fp8,0,0.37506667772928876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,1.2742400169372559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,128,1,fp8,fp8,0,0.35816001892089844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,1.2819360097249348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,1.1927093664805095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,float16,0,0.32663466533025104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,fp8,0,0.32897599538167316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,1.2258986632029216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,128,1,fp8,fp8,0,0.31227733691533405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,1.2282240390777588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,float16,0,0.32797332604726154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,1.2272213300069172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,fp8,0,0.3309653401374817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,128,1,fp8,fp8,0,0.31431466341018677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,1.2313653628031414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,1.1488107045491536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,float16,0,0.33259199062983197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,1.2320640087127686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,fp8,0,0.33482134342193604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,128,1,fp8,fp8,0,0.31726400057474774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,1.2351146539052327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,1.1513280073801677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,float16,0,0.33746667702992755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,1.2398453553517659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,fp8,0,0.34096535046895343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,128,1,fp8,fp8,0,0.3225333293279012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,1.245029369990031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,1.1582187016805012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,float16,0,0.2548746665318807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,0.7401119867960612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,fp8,0,0.2547360062599182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,128,1,fp8,fp8,0,0.24221332867940268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,0.7419892946879069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,0.690661350886027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,float16,0,0.252400000890096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,0.7376266320546468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,fp8,0,0.25306665897369385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,128,1,fp8,fp8,0,0.2424586613972982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,0.7349173227945963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,0.6871466636657715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,float16,0,0.25285865863164264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,0.7384853363037109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,fp8,0,0.2550666729609172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,128,1,fp8,fp8,0,0.24245333671569824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,0.7342186768849691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,0.6861386299133301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,float16,0,0.25274133682250977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,0.7342560291290283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,fp8,0,0.25403199593226117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,128,1,fp8,fp8,0,0.24140799045562744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,0.7356746991475424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,0.6875306765238444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,float16,0,0.2528266708056132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,0.7349706490834554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,fp8,0,0.25310399134953815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,128,1,fp8,fp8,0,0.242576003074646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,0.7419520219167074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,0.6882239977518717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,float16,0,1.7008694012959797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,5.959423700968425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,fp8,0,1.7146453857421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,128,1,fp8,fp8,0,1.5832266807556152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,5.973685582478841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,5.529946645100911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,float16,0,1.7232534090677898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,5.988208134969075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,fp8,0,1.7371946970621746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,128,1,fp8,fp8,0,1.6097760200500488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,5.999664306640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,5.5566450754801435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,float16,0,1.741312026977539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,6.014597574869792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,fp8,0,1.7543039321899414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,128,1,fp8,fp8,0,1.6319786707560222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,6.028026580810547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,5.579290390014648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,float16,0,1.7753440539042156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,6.059994379679362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,fp8,0,1.7903200785319011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,128,1,fp8,fp8,0,1.672613302866618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,6.071658452351888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,5.621711730957031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,float16,0,0.994698683420817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,3.181584040323893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,fp8,0,1.0173280239105225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,128,1,fp8,fp8,0,0.9607199827829996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,3.202042579650879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,2.961941401163737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,float16,0,0.8729226589202881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,3.0282932917277017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,fp8,0,0.8801706631978353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,128,1,fp8,fp8,0,0.8127466837565104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,3.0360320409139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,2.8116372426350913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,float16,0,0.8771519660949707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,3.0354881286621094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,fp8,0,0.8857119878133138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,128,1,fp8,fp8,0,0.8202239672342936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,3.04203192392985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,2.8173653284708657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,float16,0,0.8853813012441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,3.0463520685831704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,fp8,0,0.894757350285848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,128,1,fp8,fp8,0,0.8309973080952963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,3.054976145426432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,2.827690760294596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,float16,0,0.899786631266276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,3.0674613316853843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,fp8,0,0.9106773535410563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,128,1,fp8,fp8,0,0.8480693499247233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,3.0772692362467446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,2.8462400436401367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,float16,0,0.5273760159810384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,1.6453065872192383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,fp8,0,0.5389066537221273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,128,1,fp8,fp8,0,0.5115946531295776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,1.6565653483072917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,1.5355572700500488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,float16,0,0.4655413230260213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,1.5700480143229167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,fp8,0,0.4703306754430135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,128,1,fp8,fp8,0,0.43937067190806073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,1.5742826461791992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,1.462725321451823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,float16,0,0.46881600220998126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,1.5738612810770671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,fp8,0,0.47357332706451416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,128,1,fp8,fp8,0,0.4432213306427002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,1.5780320167541504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,1.4665066401163738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,float16,0,0.47222399711608887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,1.5791412989298503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,fp8,0,0.47856001059214276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,128,1,fp8,fp8,0,0.4487626552581787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,1.585898717244466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,1.4721973737080891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,float16,0,0.4797866741816203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,1.5901014010111492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,fp8,0,0.4869493246078491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,128,1,fp8,fp8,0,0.45769067605336505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,1.5963093439737956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,1.4799787203470867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,float16,0,0.2961653272310893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,0.8802186648050944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,fp8,0,0.3043253421783447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,128,1,fp8,fp8,0,0.2924373348553975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,0.8882613182067871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,0.8281546433766683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,float16,0,0.2635519901911418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,0.8407573699951172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,fp8,0,0.26629332701365155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,128,1,fp8,fp8,0,0.25408534208933514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,0.8453173637390137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,0.78876264890035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,float16,0,0.26519999901453656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,0.8425386746724447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,fp8,0,0.26920000712076825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,128,1,fp8,fp8,0,0.25674132506052655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,0.8464639981587728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,0.7910666465759277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,float16,0,0.2691626747449239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,0.8549546400705973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,fp8,0,0.27250667413075763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,128,1,fp8,fp8,0,0.2589919964472453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,0.8497280279795328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,0.793018658955892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,float16,0,0.2732479969660441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,0.8612266381581625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,fp8,0,0.27803732951482135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,128,1,fp8,fp8,0,0.2633066574732463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,0.8561973571777344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,0.7992479801177979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,float16,0,0.20747733116149902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,0.5260106722513834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,fp8,0,0.2076693375905355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,128,1,fp8,fp8,0,0.1977120041847229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,0.5269546508789062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,0.4889599879582723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,float16,0,0.2031946579615275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,0.5223946571350098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,fp8,0,0.2035306692123413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,128,1,fp8,fp8,0,0.19542400042215982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,0.520309329032898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,0.48869868119557697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,float16,0,0.2043466567993164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,0.5196586847305298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,fp8,0,0.2044693430264791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,128,1,fp8,fp8,0,0.19553599754969278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,0.519157330195109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,0.48812798659006756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,float16,0,0.20452266931533813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,0.5217119852701823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,fp8,0,0.20506133635838827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,128,1,fp8,fp8,0,0.19591999053955078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,0.5220319827397665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,0.4884106715520223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,float16,0,0.20414932568868002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,0.5212213198343912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,fp8,0,0.20455465714136759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,128,1,fp8,fp8,0,0.19556800524393717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,0.5231680075327555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,0.48877867062886554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,float16,0,1.271008014678955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,3.6143627166748047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,fp8,0,1.281546672185262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,128,1,fp8,fp8,0,1.1793440183003743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,3.627061208089193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,3.353877385457357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,float16,0,1.2779253323872883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,3.6258773803710938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,fp8,0,1.290058692296346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,128,1,fp8,fp8,0,1.193936030069987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,3.6392800013224282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,3.3704586029052734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,float16,0,1.2923040390014648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,3.6448532740275064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,fp8,0,1.302085320154826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,128,1,fp8,fp8,0,1.2098720073699951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,3.656810760498047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,3.384144147237142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,float16,0,1.3152426878611247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,3.6756321589152017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,fp8,0,1.328826665878296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,128,1,fp8,fp8,0,1.2408959865570068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,3.6899360020955405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,3.4155521392822266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,float16,0,0.7492160002390543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,1.9596586227416992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,fp8,0,0.7655680179595947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,128,1,fp8,fp8,0,0.7237599690755209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,1.9758826891581218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,1.8306399981180828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,float16,0,0.658026655515035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,1.8497333526611328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,fp8,0,0.6627093156178793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,128,1,fp8,fp8,0,0.6151253382364908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,1.8557119369506836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,1.7200586001078289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,float16,0,0.662389318148295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,1.855669339497884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,fp8,0,0.6685120264689127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,128,1,fp8,fp8,0,0.6208853324254354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,1.8608160018920898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,1.7255627314249675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,float16,0,0.6671413580576578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,1.8631893793741863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,fp8,0,0.6751786867777506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,128,1,fp8,fp8,0,0.6272639830907186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,1.8695893287658691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,1.7324639956156414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,float16,0,0.6792106628417969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,1.8781280517578125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,fp8,0,0.6872373421986898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,128,1,fp8,fp8,0,0.6410719950993856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,1.885690689086914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,1.7454773585001628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,float16,0,0.40009601910909015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,1.0234346389770508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,fp8,0,0.41019733746846515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,128,1,fp8,fp8,0,0.39002664883931476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,1.0324959754943848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,0.9600533644358317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,float16,0,0.3534506559371948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,0.9673866430918375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,fp8,0,0.3574666579564412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,128,1,fp8,fp8,0,0.33671998977661133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,0.9709013303120931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,0.9050026734670004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,float16,0,0.3563946485519409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,0.9713013172149658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,fp8,0,0.35999464988708496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,128,1,fp8,fp8,0,0.33925334612528485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,0.9810933272043864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,0.9064693450927734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,float16,0,0.36095468203226727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,0.9760746955871582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,fp8,0,0.3649226824442546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,128,1,fp8,fp8,0,0.3424533208211263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,0.9802133242289225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,0.9124106566111246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,float16,0,0.3654613494873047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,0.9835946559906006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,fp8,0,0.3712853193283081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,128,1,fp8,fp8,0,0.3511199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,0.9896960258483887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,0.9201707045237223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,float16,0,0.22853867212931314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,0.5594506661097208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,fp8,0,0.23518933852513632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,128,1,fp8,fp8,0,0.22576000293095908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,0.5642720063527426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,0.5275733470916748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,float16,0,0.2019253373146057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,0.5279093186060587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,fp8,0,0.20497065782546997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,128,1,fp8,fp8,0,0.19569599628448486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,0.5283146699269613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,0.49456532796223956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,float16,0,0.20320000251134238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,0.5334719816843668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,fp8,0,0.2060906688372294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,128,1,fp8,fp8,0,0.1943733294804891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,0.5299946864446005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,0.49673600991566974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,float16,0,0.2046133279800415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,0.5314453442891439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,fp8,0,0.2076373298962911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,128,1,fp8,fp8,0,0.19957866271336874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,0.5331466595331827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,0.5004693269729614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,float16,0,0.2096853256225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,0.5375093221664429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,fp8,0,0.21236266692479452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,128,1,fp8,fp8,0,0.20374399423599243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,0.5396000146865845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,0.5063733259836832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,float16,0,0.16269333163897196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,0.34574933846791583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,fp8,0,0.16265066464742026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,128,1,fp8,fp8,0,0.15457066893577576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,0.3449440002441406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,0.32255999247233075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,float16,0,0.15896000464757284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,0.3410293261210124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,fp8,0,0.1602826714515686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,128,1,fp8,fp8,0,0.15256533026695251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,0.34066665172576904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,0.3203679919242859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,float16,0,0.15850133697191873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,0.34094401200612384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,128,1,fp8,fp8,0,0.1527253290017446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,0.3407946825027466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,0.3197439908981323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,float16,0,0.15853866934776306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,0.34087467193603516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,fp8,0,0.16061866283416748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,128,1,fp8,fp8,0,0.1525920033454895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,0.342682679494222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,0.3209226727485657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,float16,0,0.16075199842453003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,0.3428693215052287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,fp8,0,0.16038933396339417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,128,1,fp8,fp8,0,0.15239466230074564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,0.3412426710128784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,0.3203199903170268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,float16,0,1.6773440043131511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,fp8,0,1.691322644551595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,3.6319093704223633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,128,1,fp8,fp8,0,1.5586719512939453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,3.6494452158610025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,3.3801492055257163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,float16,0,1.7035147349039714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,fp8,0,1.710271994272868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,3.6616907119750977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,128,1,fp8,fp8,0,1.5832799275716145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,3.674501419067383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,3.4058399200439453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,float16,0,1.7149333953857422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,fp8,0,1.7291733423868816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,3.6835947036743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,128,1,fp8,fp8,0,1.606826623280843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,3.696650822957357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,3.4291626612345376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,float16,0,1.7528266906738281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,fp8,0,1.7632907231648762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,3.724986712137858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,128,1,fp8,fp8,0,1.6474347114562988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,3.472474733988444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,3.7382453282674155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,float16,0,0.9713013172149658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,1.9823840459187825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,fp8,0,0.9904800256093343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,128,1,fp8,fp8,0,0.9360266526540121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,2.0020106633504233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,1.8614346186319988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,float16,0,0.8549439907073975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,fp8,0,0.8548213640848795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,1.8409120241800945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,128,1,fp8,fp8,0,0.7897173563639323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,1.8501332600911458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,1.7103840510050456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,float16,0,0.8529120286305746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,1.8480745951334636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,fp8,0,0.8612106641133627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,128,1,fp8,fp8,0,0.7966773509979248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,1.8556159337361653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,float16,0,0.8614506721496582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,1.7186293601989746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,1.8554080327351887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,fp8,0,0.86899733543396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,128,1,fp8,fp8,0,0.804533322652181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,1.8615093231201172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,1.7274986902872722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,float16,0,0.8770826657613119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,fp8,0,0.8984906673431396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,1.875472068786621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,128,1,fp8,fp8,0,0.8228747049967448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,1.8872480392456055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,1.74455992380778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,float16,0,0.5047626495361328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,fp8,0,0.516490658124288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,1.0199893315633137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,128,1,fp8,fp8,0,0.4885813395182292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,1.0337279637654622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,0.961855967839559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,float16,0,0.4436266819636027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,fp8,0,0.44678401947021484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,0.9511520067850748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,128,1,fp8,fp8,0,0.41673068205515545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,0.9548213481903076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,0.8896000385284424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,float16,0,0.4452213446299235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,fp8,0,0.44949865341186523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,0.9546186923980713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,128,1,fp8,fp8,0,0.4208693504333496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,0.9604746500651041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,0.8930293718973795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,float16,0,0.44948800404866535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,0.9617493152618408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,fp8,0,0.4540319840113322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,128,1,fp8,fp8,0,0.4246559937795003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,0.985322634379069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,0.8981653054555258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,float16,0,0.4572213490804036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,fp8,0,0.46380265553792316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,0.9697973728179932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,128,1,fp8,fp8,0,0.441269318262736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,0.9762346744537354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,0.9062773386637369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,float16,0,0.27215999364852905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,fp8,0,0.2797066569328308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,0.5426079829533895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,128,1,fp8,fp8,0,0.26579199234644574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,0.5501866738001505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,0.513919989267985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,float16,0,0.23836799462636313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,fp8,0,0.2391306757926941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,0.5045813322067261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,128,1,fp8,fp8,0,0.23321600755055746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,0.5069653193155924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,0.4880533218383789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,float16,0,0.23842666546503702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,fp8,0,0.2412053346633911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,0.5055359999338785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,128,1,fp8,fp8,0,0.22985599438349405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,0.5071573257446289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,0.4782506624857585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,float16,0,0.24145066738128662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,fp8,0,0.24438933531443277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,0.5110773245493571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,128,1,fp8,fp8,0,0.23325333992640176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,0.5134506622950236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,0.48209599653879803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,float16,0,0.24767466386159262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,fp8,0,0.25066133340199787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,0.5167306661605835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,128,1,fp8,fp8,0,0.23828800519307455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,0.5189173221588135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,0.4904160102208455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,float16,0,0.15516266226768494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,fp8,0,0.15974400440851846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,0.30543466409047443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,128,1,fp8,fp8,0,0.1549066702524821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,0.30857600768407184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,0.2917813261349996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,float16,0,0.13607466220855713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,fp8,0,0.13822933038075766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.28282666206359863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,128,1,fp8,fp8,0,0.13008000453313193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.2834879954655965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,0.2672106623649597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,float16,0,0.13609066605567932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,fp8,0,0.13621866703033447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.2836640079816182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,128,1,fp8,fp8,0,0.12999999523162842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.2855199972788493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,0.26710400978724164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,float16,0,0.13822399576505026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,fp8,0,0.13924266894658408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.2857653299967448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,128,1,fp8,fp8,0,0.1323199967543284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.2856266697247823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,0.26928534110387164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,float16,0,0.13876799742380777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.286682665348053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,fp8,0,0.14223999778429666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,128,1,fp8,fp8,0,0.1397546629110972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.2898293336232503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,0.274890661239624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,float16,0,0.1113866666952769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.19738133748372397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,fp8,0,0.11189333597819011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,128,1,fp8,fp8,0,0.10944533348083496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.19765333334604898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.1874879995981852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,float16,0,0.11143466830253601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.1956640084584554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,fp8,0,0.11131733655929565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,128,1,fp8,fp8,0,0.1055573324362437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.1974239945411682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,float16,0,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.18592000007629395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.19565866390864053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,fp8,0,0.11343466242154439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,128,1,fp8,fp8,0,0.1058079997698466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.1976319948832194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.18544000387191772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,float16,0,0.11225066582361858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.19751467307408652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,fp8,0,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,128,1,fp8,fp8,0,0.10733866691589355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.1970133384068807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.18550399939219156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,float16,0,0.11127466956774394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.19568532705307007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,fp8,0,0.11163199941317241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,128,1,fp8,fp8,0,0.10668800274531047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.19746132691701254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.185098667939504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,float16,0,1.2524906794230144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,2.2888266245524087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,fp8,0,1.2620480060577393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,128,1,fp8,fp8,0,1.1614293257395427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,2.30077330271403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,2.1285012563069663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,float16,0,1.2626880009969075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,fp8,0,1.273408015569051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,2.305936018625895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,128,1,fp8,fp8,0,1.1828587055206299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,2.3160160382588706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,2.1472907066345215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,float16,0,1.2973919709523518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,fp8,0,1.2827413082122803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,2.3178772926330566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,128,1,fp8,fp8,0,1.2009173234303792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,2.33133331934611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,2.164789358774821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,float16,0,1.2995253403981526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,fp8,0,1.313530683517456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,2.3448053995768228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,128,1,fp8,fp8,0,1.2318987051645915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,2.359402656555176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,2.193317254384359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,float16,0,0.7324426968892416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,fp8,0,0.7465386390686035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,1.2729439735412598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,128,1,fp8,fp8,0,0.7063360214233398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,1.2879199981689453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,1.2025226751963298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,float16,0,0.6389760176340739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,fp8,0,0.649727980295817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,1.1699360211690266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,128,1,fp8,fp8,0,0.5984640121459961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,1.1758560339609783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,1.090272029240926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,float16,0,0.6431253353754679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,fp8,0,0.6493386824925741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,1.1762080192565918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,128,1,fp8,fp8,0,0.6010560194651285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,1.1785813172658284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,1.0929546356201172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,float16,0,0.6502559979756674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,fp8,0,0.6577279965082804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,1.1815520127614338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,128,1,fp8,fp8,0,0.6090133190155029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,1.188479979832967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,1.1024266878763835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,float16,0,0.6619786818822225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,fp8,0,0.6698400179545084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,1.193727970123291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,128,1,fp8,fp8,0,0.6234933137893677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,1.2023839950561523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,1.1177600224812825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,float16,0,0.38262399037679035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,fp8,0,0.3922453324000041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,0.6635200182596842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,128,1,fp8,fp8,0,0.37196266651153564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,0.6726933320363363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,0.6342293421427408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,float16,0,0.33482666810353595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,fp8,0,0.3380693197250366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,0.6094826857248942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,128,1,fp8,fp8,0,0.31648532549540204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,0.6114399830500284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,0.5727680126825968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,float16,0,0.3359573284784953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,0.6165226697921753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,fp8,0,0.34085333347320557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,128,1,fp8,fp8,0,0.32039467493693036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,0.6141706705093384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,0.5760159889856974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,float16,0,0.34511999289194745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,fp8,0,0.3471413453420003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,0.6152106523513794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,128,1,fp8,fp8,0,0.32552532354990643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,0.6187466780344645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,0.5801546573638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,float16,0,0.3478240172068278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,0.6231733163197836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,fp8,0,0.3531573216120402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,128,1,fp8,fp8,0,0.32977066437403363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,0.6279893318812052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,0.5910986661911011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,float16,0,0.20965333779652914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,0.3574506839116414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,fp8,0,0.2137599984804789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,128,1,fp8,fp8,0,0.20592532555262247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,0.36166401704152423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,0.3450026512145996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,float16,0,0.18104000886281332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,fp8,0,0.1829599936803182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,0.327237327893575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,128,1,fp8,fp8,0,0.17493865887324014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,0.3285226623217265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,0.3099786639213562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,float16,0,0.18172266085942587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,0.326474666595459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,fp8,0,0.18514132499694824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,128,1,fp8,fp8,0,0.17701866229375204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,0.33078400293986004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,0.31252266963322956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,float16,0,0.18369066715240479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,fp8,0,0.1868106722831726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,0.329312006632487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,128,1,fp8,fp8,0,0.17900800704956055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,0.3322453300158183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,0.31492799520492554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,float16,0,0.1893706719080607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,fp8,0,0.19128533204396567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,0.3366560141245524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,128,1,fp8,fp8,0,0.18399999539057413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,0.3386400143305461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,0.319706658522288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,float16,0,0.12164800365765889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,fp8,0,0.12467733025550842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.20353599389394125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,128,1,fp8,fp8,0,0.12184533476829529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.20678400993347168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.19951466719309488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,float16,0,0.10788266857465108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,fp8,0,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.1893600026766459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,128,1,fp8,fp8,0,0.10115733742713928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.1906720002492269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.1776640017827352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,float16,0,0.107424000898997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.18945600589116415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,fp8,0,0.10760533809661865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,128,1,fp8,fp8,0,0.10319466392199199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.19126399358113608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.17885865767796835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,float16,0,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,fp8,0,0.10789333780606587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.18926399946212769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,128,1,fp8,fp8,0,0.10311999917030334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.19163199265797934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.17892267306645712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,float16,0,0.10898133118947347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.19137599070866904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,fp8,0,0.11173333724339803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,128,1,fp8,fp8,0,0.10749866565068562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.19329599539438883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.1835093299547831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,float16,0,0.08737599849700928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,fp8,0,0.08746666709582011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.13700800140698752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,128,1,fp8,fp8,0,0.08520533641179402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.13780267039934793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.1301813324292501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,float16,0,0.08695466319719951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,fp8,0,0.0869653324286143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.1362933317820231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,128,1,fp8,fp8,0,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.13801599542299905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.12970667084058127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,float16,0,0.08711999654769897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.13808000087738037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,fp8,0,0.08668266733487447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,128,1,fp8,fp8,0,0.08488000432650249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.13619732856750488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.12961066762606302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,float16,0,0.08675733208656311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.13731732964515686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,fp8,0,0.08720533053080241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,128,1,fp8,fp8,0,0.08457600076993306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.13598933815956116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.1299199958642324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,float16,0,0.0870293378829956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.13661866386731467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,fp8,0,0.08716799815495808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,128,1,fp8,fp8,0,0.08474133412043254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.13613866766293845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.1296266714731852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,float16,0,1.675279935201009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,2.481621265411377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,fp8,0,1.6810773213704426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,128,1,fp8,fp8,0,1.5621760686238606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,2.490901311238607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,2.29532798131307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,float16,0,1.7147679328918457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,2.505237261454264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,fp8,0,1.703488032023112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,128,1,fp8,fp8,0,1.553834597269694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,2.5105652809143066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,2.3071786562601724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,float16,0,1.7293814023335774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,2.5396745999654136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,fp8,0,1.7239999771118164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,128,1,fp8,fp8,0,1.5754879315694172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,2.532858689626058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,2.3342347145080566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,float16,0,1.7646667162577312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,fp8,0,1.7574346860249836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,2.581381320953369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,128,1,fp8,fp8,0,1.6134452819824219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,2.5710934003194175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,float16,0,0.9634400208791097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,2.372447967529297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,fp8,0,0.9777493476867676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,1.3879893620808919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,128,1,fp8,fp8,0,0.9265440305074056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,1.402575969696045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,1.3174506823221843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,float16,0,0.8358773390452067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,fp8,0,0.8411839803059896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,1.2431146303812664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,128,1,fp8,fp8,0,0.777125358581543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,1.2508906523386638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,1.17084797223409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,float16,0,0.8415679931640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,1.250480016072591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,fp8,fp8,0,0.785535971323649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,1.2594880263010662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,1.1699893474578857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,float16,0,0.8511306444803873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,1.2603519757588704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,fp8,0,0.8591840267181396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,128,1,fp8,fp8,0,0.7950507005055746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,1.2665013472239177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,1.18012801806132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,float16,0,0.866645336151123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,1.2784960269927979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,fp8,0,0.8745600382486979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,128,1,fp8,fp8,0,0.8132853507995605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,1.2885493437449138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,1.1968692938486736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,float16,0,0.4938186804453532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,0.7123200098673502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,fp8,0,0.5031573375066122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,128,1,fp8,fp8,0,0.476911981900533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,0.7289386590321859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,0.6767199834187826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,float16,0,0.42956801255544025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,0.6401439905166626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,fp8,0,0.4327733516693115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,128,1,fp8,fp8,0,0.40300798416137695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,0.6435786485671997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,0.601312001546224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,float16,0,0.43351999918619794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,0.6432533264160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,fp8,0,0.4375893274943034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,128,1,fp8,fp8,0,0.4087306658426921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,0.6479200124740601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,0.6069440046946207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,float16,0,0.43851200739542645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,0.6610186497370402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,fp8,0,0.4416693449020386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,128,1,fp8,fp8,0,0.41155731678009033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,0.6536266803741455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,0.6114240090052286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,float16,0,0.4474080006281535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,0.6590186754862467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,fp8,0,0.45343999067942303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,128,1,fp8,fp8,0,0.42104001839955646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,0.6644320090611776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,0.6195093393325806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,float16,0,0.2614399989446004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,0.3816479841868083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,fp8,0,0.2725600004196167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,128,1,fp8,fp8,0,0.25413332382837933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,0.38039998213450116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,0.35993067423502606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,float16,0,0.22557334105173746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,0.336736003557841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,fp8,0,0.22871466477711996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,128,1,fp8,fp8,0,0.2171306610107422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,0.337557315826416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,0.32119999329249066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,float16,0,0.22603732347488403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,0.33694398403167725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,fp8,0,0.22853867212931314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,128,1,fp8,fp8,0,0.21831466754277548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,0.33904000123341876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,0.32265599568684894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,float16,0,0.23042132457097372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,0.3417919874191284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,fp8,0,0.23312532901763916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,128,1,fp8,fp8,0,0.22192533810933432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,0.3431626558303833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,0.3251306613286336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,float16,0,0.235152006149292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,0.34748268127441406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,fp8,0,0.23891733090082803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,128,1,fp8,fp8,0,0.2262666622797648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,0.351146658261617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,0.33081066608428955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,float16,0,0.14247467120488486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.20669333140055338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,fp8,0,0.14738667011260986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,128,1,fp8,fp8,0,0.14222400387128195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.20983999967575073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.19971734285354614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,float16,0,0.12166399757067363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.1856213410695394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,fp8,0,0.12424533565839131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,128,1,fp8,fp8,0,0.11748799681663513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.18705066045125326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.1732213298479716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,float16,0,0.12190399567286174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.18344000975290933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,fp8,0,0.12406933307647705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,128,1,fp8,fp8,0,0.11768000324567159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.1868799924850464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.1747573415438334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,float16,0,0.1237600048383077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.18717867136001587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,fp8,0,0.12573867042859396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,128,1,fp8,fp8,0,0.12019200126330058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.18773333231608072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.1774133245150248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,float16,0,0.12664000193277994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.1902880072593689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,fp8,0,0.13052800297737122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,128,1,fp8,fp8,0,0.12610133488972983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.19164266188939413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.18312533696492514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,float16,0,0.08375466863314311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.12001599868138631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,fp8,0,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,128,1,fp8,fp8,0,0.0848640004793803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.12315199772516887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.12061333656311035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,float16,0,0.07853866616884868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.11338133613268535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,fp8,0,0.07859200239181519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,128,1,fp8,fp8,0,0.0730453332265218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.11255466938018799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.10727999607721965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,float16,0,0.07733866572380066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.11346133550008138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,fp8,0,0.07905066510041554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,128,1,fp8,fp8,0,0.07469333211580913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.11383466919263203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,float16,0,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.11381333072980244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,fp8,0,0.07928533355395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,128,1,fp8,fp8,0,0.07464000085989635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.1137600044409434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.10723732908566792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,float16,0,0.0787306676308314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.11433066924413045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,fp8,0,0.08073066671689351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,128,1,fp8,fp8,0,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.11583466331164043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.10939199725786845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,float16,0,0.06256533165772755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.08713066577911377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,fp8,0,0.06237866481145223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,128,1,fp8,fp8,0,0.06228266656398773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.08902933200200398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,float16,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.0888266662756602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,fp8,0,0.06251733501752217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,128,1,fp8,fp8,0,0.060496002435684204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.08886933326721191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.08310399949550629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,float16,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.08711466193199158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,fp8,0,0.06232533355553945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,128,1,fp8,fp8,0,0.061039999127388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.08923733234405518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.08520533641179402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,float16,0,0.06277866661548615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.08749333024024963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,fp8,0,0.06257066627343495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,128,1,fp8,fp8,0,0.06058666606744131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.08906132976214091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.08304533362388611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,float16,0,0.0625493327776591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.08726933598518372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,fp8,0,0.0625546673933665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,128,1,fp8,fp8,0,0.06062399844328562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.08477333188056946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,float16,0,1.253391981124878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,1.646527926127116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,fp8,0,1.2589066823323567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,128,1,fp8,fp8,0,1.1566293239593506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,1.6492373148600261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,1.5344692866007488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,float16,0,1.2810826301574707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,1.674085299173991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,fp8,0,1.2846559683481853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,128,1,fp8,fp8,0,1.1755413214365642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,1.6766346295674641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,1.5507787068684895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,float16,0,1.3133440017700195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,1.688576062520345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,fp8,0,1.2936053276062012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,128,1,fp8,fp8,0,1.187461296717326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,1.6874292691548665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,1.5630505879720051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,float16,0,1.3197866280873616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,1.7194666862487793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,fp8,0,1.320741335550944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,128,1,fp8,fp8,0,1.21670397122701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,1.7186719576517742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,1.5919200579325359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,float16,0,0.7306773662567139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,0.9416960080464681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,fp8,0,0.7514826456705729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,128,1,fp8,fp8,0,0.6984373728434244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,0.950106700261434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,0.8932480017344157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,float16,0,0.633840004603068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,0.8310186862945557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,fp8,0,0.6355893214543661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,128,1,fp8,fp8,0,0.5866933266321818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,0.8354026476542155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,0.7778080304463705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,float16,0,0.6376906633377075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,0.8402613004048666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,fp8,0,0.6436373392740885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,128,1,fp8,fp8,0,0.5948319832483927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,0.8438933690388998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,0.7863413492838541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,float16,0,0.6442933479944865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,0.845957358678182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,fp8,0,0.6508959929148356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,128,1,fp8,fp8,0,0.6016480127970377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,0.8495573202768961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,0.7918986479441324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,float16,0,0.6549919843673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,0.8574559688568115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,fp8,0,0.6645546754201254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,128,1,fp8,fp8,0,0.6145600080490112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,0.866101344426473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,0.8055146535237631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,float16,0,0.3757226864496867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,0.4857493241628011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,fp8,0,0.3835093180338542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,128,1,fp8,fp8,0,0.36268266042073566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,0.49235733350118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,0.4629280169804891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,float16,0,0.3242986599604289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,0.4291093349456787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,fp8,0,0.3288106719652812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,128,1,fp8,fp8,0,0.30665600299835205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,0.43989332516988117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,0.40694932142893475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,float16,0,0.3273973266283671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,0.43044265111287433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,fp8,0,0.33162667353947956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,128,1,fp8,fp8,0,0.31144533554712933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,0.4336426655451457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,0.410591999689738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,float16,0,0.33247466882069904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,0.43590935071309406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,fp8,0,0.334666649500529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,128,1,fp8,fp8,0,0.3144320050875346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,0.43999465306599933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,0.4143413305282593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,float16,0,0.34065600236256915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,0.44542400042215985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,fp8,0,0.3441280126571655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,128,1,fp8,fp8,0,0.3223573366800944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,0.4484320084253947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,0.4209119876225789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,float16,0,0.1996906598409017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,0.2601066629091899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,fp8,0,0.20641599098841348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,128,1,fp8,fp8,0,0.19620800018310547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.2634773254394531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,0.2504799962043762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,float16,0,0.16947199900945029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.22459733486175537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,fp8,0,0.17128000656763712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,128,1,fp8,fp8,0,0.16385599970817566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.22642133633295694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.21754133701324463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,float16,0,0.16963199774424234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.22696532805760702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,fp8,0,0.17317867279052734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,128,1,fp8,fp8,0,0.16684800386428833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.22881066799163818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.2182719906171163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,float16,0,0.171941339969635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.22910932699839273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,fp8,0,0.17511467138926187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,128,1,fp8,fp8,0,0.1688213348388672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.23161067565282187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.2221119999885559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,float16,0,0.17806933323542276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.2353066603342692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,fp8,0,0.18136000633239746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,128,1,fp8,fp8,0,0.175162672996521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.23777065674463907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.2267520030339559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,float16,0,0.11002666751543681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.1423520048459371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,fp8,0,0.11351999640464783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,128,1,fp8,fp8,0,0.11173866192499797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.14614933729171753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.1411733329296112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,float16,0,0.09614400068918864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.1279306709766388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,fp8,0,0.0983679989973704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,128,1,fp8,fp8,0,0.09149866302808125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.12955199678738913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.12006400028864543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,float16,0,0.09664000074068706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.12873599926630655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,fp8,0,0.09937066833178203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,128,1,fp8,fp8,0,0.09119466940561931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.12795733412106833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.1218293309211731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,float16,0,0.09742933511734009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.12974400321642557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,fp8,0,0.09910933176676433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,128,1,fp8,fp8,0,0.09289066990216573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.12986133495966592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.12325867017110188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,float16,0,0.09986666838328044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.13062399625778198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,fp8,0,0.1018293301264445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,128,1,fp8,fp8,0,0.09512000282605489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.13182399670283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.12531733512878418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,float16,0,0.06606400012969971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.08712533116340637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,fp8,0,0.06865600248177846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,128,1,fp8,fp8,0,0.06670400003592174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.08901333808898926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.08480532964070638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,float16,0,0.060826669136683144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.08312533299128215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,fp8,0,0.06267733375231425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,128,1,fp8,fp8,0,0.05834666887919108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.08392000198364258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.07707199951012929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,float16,0,0.06229866544405619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.08298666775226593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,fp8,0,0.06259733438491821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,128,1,fp8,fp8,0,0.060090666015942894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.07750399907430013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,float16,0,0.062421331803003945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.08264000217119853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,fp8,0,0.06253866851329803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,128,1,fp8,fp8,0,0.06052266558011373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.0842080016930898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.07886933286984761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,float16,0,0.062319998939832054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.08374933401743571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,fp8,0,0.06444266438484192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,128,1,fp8,fp8,0,0.060821334520975746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.07898666461308797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,float16,0,0.05292266607284546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.06568000217278798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,fp8,0,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,128,1,fp8,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.06657599906126659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.06306666632493337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.06459733347098033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,fp8,0,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,128,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.06601066887378693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.0631573349237442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,float16,0,0.05296533306439718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.06540266672770183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,fp8,0,0.05420266588528951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,128,1,fp8,fp8,0,0.052655999859174095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.06665066878000896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.0625439981619517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,float16,0,0.053370664517084755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.0666133314371109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,fp8,0,0.05425066749254862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,128,1,fp8,fp8,0,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.06648533542950948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.062352001667022705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,float16,0,0.052815998593966164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.06611733138561249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,fp8,0,0.054373333851496376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,128,1,fp8,fp8,0,0.050186668833096824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.06458666423956554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,float16,0,1.5270719528198242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,1.7738986015319824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,fp8,0,1.5166773796081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,128,1,fp8,fp8,0,1.4652105967203777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,1.7638826370239258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,1.703701337178548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,float16,0,1.537882645924886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,1.7837120691935222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,fp8,0,1.56769593556722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,128,1,fp8,fp8,0,1.4591466585795085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,1.7793386777242024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,1.6903146107991536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,float16,0,1.6108640034993489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,1.825119972229004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,fp8,0,1.5650347073872883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,128,1,fp8,fp8,0,1.6846453348795574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,64,128,1,fp8,fp8,0,1.0249706904093425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,1.8087414105733235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,1.9213813145955403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,float16,0,1.5656320254007976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,1.8086826006571453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,fp8,0,1.5324053764343262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,128,1,fp8,fp8,0,1.6651360193888347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,1.7763147354125977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,float16,0,0.7999839782714844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,1.9058400789896648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,0.9388373692830404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,fp8,0,0.7829493681589762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,128,1,fp8,fp8,0,0.8198239803314209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,0.9228426615397135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,0.9422986507415771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,float16,0,0.7723039786020914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,0.89955735206604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,fp8,0,0.7680959701538086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,128,1,fp8,fp8,0,0.7245173454284668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,0.8928319613138834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,0.8467040061950684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,float16,0,0.7789920171101888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,0.9114720026652018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,fp8,0,0.7749333381652832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,128,1,fp8,fp8,0,0.7234666347503662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,0.90066130956014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,0.8453066349029541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,float16,0,0.792517344156901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,0.9194560050964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,fp8,0,0.787285327911377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,128,1,fp8,fp8,0,0.828437328338623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,0.9152106444040934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,0.951317310333252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,float16,0,0.7771626313527426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,0.9229599634806315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,fp8,0,0.7677973111470541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,128,1,fp8,fp8,0,0.8191680113474528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,0.8956960042317709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,float16,0,0.40937598546346027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,0.9438239733378092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,0.48071467876434326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,fp8,0,0.4025546709696452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,128,1,fp8,fp8,0,0.41262932618459064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,0.4726666609446208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,0.4780160188674927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,float16,0,0.3959253231684367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,fp8,0,0.3936320145924886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,0.46000532309214276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,128,1,fp8,fp8,0,0.3702720006306966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,0.4575626850128174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,0.4346453348795573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,float16,0,0.4002133210500081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,0.4654879967371623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,fp8,0,0.39771731694539386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,128,1,fp8,fp8,0,0.37134401003519696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,0.46253331502278644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,0.43319467703501385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,float16,0,0.4060639937718709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,0.4717173178990682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,fp8,0,0.40215468406677246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,128,1,fp8,fp8,0,0.40297067165374756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,0.4678293466567993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,0.4662880102793376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,float16,0,0.39819733301798504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,0.46483198801676434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,fp8,0,0.3941813309987386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,128,1,fp8,fp8,0,0.4008106787999471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,0.45949868361155194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,0.4643733501434326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,float16,0,0.21413866678873697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,0.25282132625579834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,fp8,0,0.21197867393493652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,128,1,fp8,fp8,0,0.21554666757583618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,0.2483839988708496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,0.2520800034205119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,float16,0,0.20617600282033285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.24149332443873087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,fp8,0,0.20562134186426798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,128,1,fp8,fp8,0,0.19455466667811075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.2384106715520223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.22758400440216064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.24250133832295737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,float16,0,0.20857600371042886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,fp8,0,0.20758932828903198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,128,1,fp8,fp8,0,0.19406932592391968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.2424266735712687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.22805333137512207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,float16,0,0.2116640011469523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.24712532758712769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,fp8,0,0.21122666200002035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,128,1,fp8,fp8,0,0.20746666193008423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.2453546722730001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.2406239906946818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,float16,0,0.20769067605336508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.24507200717926025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,fp8,0,0.20771199464797974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,128,1,fp8,fp8,0,0.20627733071645102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.24131733179092407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,float16,0,0.11777599652608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.23860265811284384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.14015466968218485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,fp8,0,0.1158026655515035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,128,1,fp8,fp8,0,0.11920533577601115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.1388053297996521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.1383680005868276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,float16,0,0.11120532949765523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.13182933131853738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,128,1,fp8,fp8,0,0.10481599966684978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.1311893363793691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.1236853301525116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,float16,0,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.13184000054995218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,fp8,0,0.11161599556605022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,128,1,fp8,fp8,0,0.10501333077748616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.13385066390037537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.12356266379356384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,float16,0,0.11339199542999268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.13382400075594583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,fp8,0,0.11338667074839275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,128,1,fp8,fp8,0,0.10989333192507426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.1341546674569448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.12985600034395853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,float16,0,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.13380799690882364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,fp8,0,0.11050132910410564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,128,1,fp8,fp8,0,0.11160533626874287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.13199999928474426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.1304639975229899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,1.1455732981363933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,float16,0,0.06437333424886067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.07849066456158955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,fp8,0,0.06646933158238728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,128,1,fp8,fp8,0,0.06950399776299794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.07734400033950806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.08067733546098073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,float16,0,0.06434666613737743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,fp8,0,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,128,1,fp8,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.075013334552447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.07216533521811168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,float16,0,0.06434133152167003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.0754613329966863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,fp8,0,0.0644053320089976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,128,1,fp8,fp8,0,0.06215466558933258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.07503999769687653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.07354666789372762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,float16,0,0.06620799998442332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.07644266883532207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,fp8,0,0.06663999954859416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,128,1,fp8,fp8,0,0.06439466774463654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.07643199960390727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.07470400134722392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,float16,0,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.07639466722806294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,fp8,0,0.06439466774463654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,128,1,fp8,fp8,0,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.07572799921035767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.07453866799672444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,float16,0,0.04166933397452036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.05016533533732096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,128,1,fp8,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.05077333251635233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.051957334081331887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,float16,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.050698667764663696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,fp8,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,128,1,fp8,fp8,0,0.039994666973749794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.05031466484069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,float16,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.050160000721613564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,128,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.04820266862710317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,float16,0,0.041984001795450844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.05045866469542185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,fp8,0,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,128,1,fp8,fp8,0,0.041989331444104515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.05207466582457224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,float16,0,0.04101333270470301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.050517335534095764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,128,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.05043200155099233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,128,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.03384533276160558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.03259200106064478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,128,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,float16,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,128,1,fp8,fp8,0,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.03188266605138779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,float16,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,128,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,float16,0,1.4789279301961262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,1.4893652598063152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,fp8,0,1.4689013163248699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,128,1,fp8,fp8,0,1.4263893763224285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,1.439674695332845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,1.4760959943135579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,float16,0,1.4986720085144043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,fp8,0,1.48908265431722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,128,1,fp8,fp8,0,1.4056533177693684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,1.4976693789164226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,1.4278186162312825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,float16,0,1.5340639750162761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,1.5429439544677734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,fp8,0,1.517306645711263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,128,1,fp8,fp8,0,1.6382400194803874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,1.5274559656778972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,1.657050609588623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,float16,0,1.5219732920328777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,1.5373867352803547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,fp8,0,1.492037296295166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,128,1,fp8,fp8,0,1.6184959411621094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,1.5030879974365234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,1.6428906122843425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,float16,0,0.7762453556060791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,0.7887787024180094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,fp8,0,0.7622453371683756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,128,1,fp8,fp8,0,0.7784480253855387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,0.7760533491770426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,0.7892159620920817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,float16,0,0.750320037206014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,0.7545173168182373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,fp8,0,0.7463786602020264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,128,1,fp8,fp8,0,0.7028266588846842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,0.7513653437296549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,0.716421365737915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,float16,0,0.756826639175415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,0.7636266549428304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,fp8,0,0.7540373007456461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,128,1,fp8,fp8,0,0.7022026379903158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,0.7588373025258383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,0.713813304901123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,float16,0,0.7706826527913412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,0.7775733470916748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,fp8,0,0.7649706999460856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,128,1,fp8,fp8,0,0.806879997253418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,0.772874673207601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,0.8188959757486979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,float16,0,0.7569333712259928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,0.7635040283203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,fp8,0,0.7464959621429443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,128,1,fp8,fp8,0,0.7961013317108154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,0.7547307014465332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,0.8106506665547689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,float16,0,0.39897600809733075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,0.4060426553090413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,fp8,0,0.3914080063501994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,128,1,fp8,fp8,0,0.39851733048756915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,0.39737598101298016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,0.40510400136311847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,float16,0,0.38657065232594806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,0.3890346686045329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,fp8,0,0.38220266501108807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,128,1,fp8,fp8,0,0.3585653305053711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,0.38575466473897296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,0.364896019299825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,float16,0,0.38885335127512616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,0.39244266351064044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,fp8,0,0.38627199331919354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,128,1,fp8,fp8,0,0.3613599936167399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,0.3888373374938965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,0.3659253517786662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,float16,0,0.3962399959564209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,0.3996053139368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,fp8,0,0.393829345703125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,128,1,fp8,fp8,0,0.39213867982228595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,0.3960640033086141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,0.3954826593399048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,float16,0,0.3883039951324463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,0.39317866166432697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,fp8,0,0.3829866647720337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,128,1,fp8,fp8,0,0.38927467664082843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,0.38730132579803467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,0.39580798149108887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,float16,0,0.21000534296035767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,0.21332265933354697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,fp8,0,0.20579200983047485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,128,1,fp8,fp8,0,0.210533340771993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,0.20805867513020834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,0.21326400836308798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,float16,0,0.20055999358495077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.20174400011698404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,fp8,0,0.2001439929008484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,128,1,fp8,fp8,0,0.18772266308466592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.20136533180872598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.18918399016062418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,float16,0,0.20407466093699136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.20521599054336548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,fp8,0,0.20347734292348227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,128,1,fp8,fp8,0,0.19079999128977457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.20306134223937988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.19230933984120688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,float16,0,0.2065920035044352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.20806399981180826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,fp8,0,0.20702399810155234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,128,1,fp8,fp8,0,0.20269334316253662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.20780799786249796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.2051466703414917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,float16,0,0.20295466979344687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.20529067516326904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,fp8,0,0.20204265912373862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,128,1,fp8,fp8,0,0.2014240026473999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.2032853364944458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.20307733615239462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,float16,0,0.1162506639957428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.1179093321164449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,fp8,0,0.11450133721033733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,128,1,fp8,fp8,0,0.11715733011563619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.11588799953460693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.11776000261306763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,float16,0,0.10940800110499065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.10983467102050781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,fp8,0,0.10949333508809407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,128,1,fp8,fp8,0,0.1035093367099762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.109525332848231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.10292800267537434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,float16,0,0.11161067088445027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.11199999849001567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,fp8,0,0.11032000184059143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,128,1,fp8,fp8,0,0.10333333412806193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.1111840009689331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,float16,0,0.11334400375684102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.11560533444086711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,fp8,0,0.11159466703732808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,128,1,fp8,fp8,0,0.1104159951210022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.11225066582361858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.11097600062688191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,float16,0,0.1104693313439687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.11133866508801778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,fp8,0,0.11091732978820801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,128,1,fp8,fp8,0,0.11096533139546712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.11108266313870747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.11140799522399902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,float16,0,0.06542933483918507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.0666240006685257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,fp8,0,0.06509333352247874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,128,1,fp8,fp8,0,0.06678933401902516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.06857066849867503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,float16,0,0.06237866481145223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.06286933521429698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,fp8,0,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,128,1,fp8,fp8,0,0.06066133578618368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.06257066627343495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.06055466830730438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,float16,0,0.06266133487224579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.06266666452089946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,128,1,fp8,fp8,0,0.060559997955958046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.0625546673933665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.06035733222961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,float16,0,0.06434666613737743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.06438399851322174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,128,1,fp8,fp8,0,0.06247999767462412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.06442666550477345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.06273599962393443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,float16,0,0.062165334820747375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.06238399942715963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,fp8,0,0.06251200040181477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,128,1,fp8,fp8,0,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.0641653339068095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.06406400104363759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,float16,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,128,1,fp8,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,float16,0,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.041264000038305916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,128,1,fp8,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.042949333786964417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,fp8,0,0.04110399881998698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,128,1,fp8,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,fp8,0,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,128,1,fp8,fp8,0,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,float16,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.04345599810282389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,fp8,0,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,128,1,fp8,fp8,0,0.04090133309364319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,float16,0,0.027690666417280834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,128,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,float16,0,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,128,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,float16,0,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,float16,0,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,fp8,0,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,128,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.024256000916163128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,128,1,fp8,fp8,0,0.022074667116006214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,float16,0,0.023754666248957317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,128,1,fp8,fp8,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,float16,0,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.02402133246262868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,128,1,fp8,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,float16,0,0.6998293399810791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,0.6871039867401123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,fp8,0,0.6948533058166504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,128,1,fp8,fp8,0,0.6527466773986816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,0.6801866690317789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,0.6430399815241495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,float16,0,0.7086079915364584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,0.6969280242919922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,fp8,0,0.7036693096160889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,128,1,fp8,fp8,0,0.6575946807861328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,0.6886560122172037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,0.6460959911346436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,float16,0,0.7227253119150797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,0.7088426748911539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,fp8,0,0.7170399824778239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,128,1,fp8,fp8,0,0.7507572968800863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,0.700650691986084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,0.7413120269775391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,float16,0,0.7063626448313395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,0.6932373046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,fp8,0,0.6968746980031332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,128,1,fp8,fp8,0,0.7438720067342123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,0.6825386683146158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,0.7327466805775961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,float16,0,0.3736746708552043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,0.36691733201344806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,fp8,0,0.3668266534805298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,128,1,fp8,fp8,0,0.3763146797815959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,0.3596746524175008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,0.3680906693140666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,float16,0,0.35789334774017334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.3516746759414673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,fp8,0,0.35552533467610675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,128,1,fp8,fp8,0,0.3332479993502299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.34888001283009845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.32691200574239093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,float16,0,0.3619680007298787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,0.35525866349538165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,fp8,0,0.36024534702301025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,128,1,fp8,fp8,0,0.338703989982605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,0.3537013530731201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.3306399981180827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,float16,0,0.3686079978942871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,0.36132268110911053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,fp8,0,0.3657279809315999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,128,1,fp8,fp8,0,0.3656586805979411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,0.3593279918034871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,0.3593279918034871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,float16,0,0.36129601796468097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,0.35341866811116535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,fp8,0,0.35608001550038654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,128,1,fp8,fp8,0,0.36090131600697833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,0.35096001625061035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,0.35652267932891846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,float16,0,0.19555733601252237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,0.1930613319079081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,fp8,0,0.19426665703455606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,128,1,fp8,fp8,0,0.1986666719118754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,0.19025067488352457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.19499200582504272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,float16,0,0.18733332554499307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.18394132455190024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,fp8,0,0.187008003393809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,128,1,fp8,fp8,0,0.17542399962743124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.18306134144465128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.17300266027450562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,float16,0,0.19102933009465536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.1877066691716512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,fp8,0,0.18923733631769815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,128,1,fp8,fp8,0,0.17753599087397257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.18722132841746011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.1757226586341858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,float16,0,0.19352000951766968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.18980266650517783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,fp8,0,0.19242666165033975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,128,1,fp8,fp8,0,0.18931732575098673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.18968000014623007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.18622400363286337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,float16,0,0.1914506753285726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.1871359944343567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,fp8,0,0.1880106727282206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,128,1,fp8,fp8,0,0.18805867433547974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.18497065703074136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.185205340385437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,float16,0,0.10961066683133443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.10756267110506694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,fp8,0,0.1074773371219635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,128,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.10557333628336589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.10909866293271382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,float16,0,0.10140800476074219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.09920533498128255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,128,1,fp8,fp8,0,0.09404800335566203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.09735999504725139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.09199466307957967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,float16,0,0.10217066605885823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.10031466682751973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,fp8,0,0.10145066181818645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,128,1,fp8,fp8,0,0.09714667002360027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.09909866253534953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.09326400359471639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,float16,0,0.10514133175214131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.10292800267537434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,fp8,0,0.10261332988739014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,128,1,fp8,fp8,0,0.10043733318646748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.10105599959691365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.09988266229629517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,float16,0,0.1032533347606659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,fp8,0,0.10114666819572449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,128,1,fp8,fp8,0,0.10282133022944133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.10065600275993347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.10155733426411946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,float16,0,0.06217066446940104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,fp8,0,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,128,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.0636053333679835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,float16,0,0.058431997895240784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.05779199798901876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,fp8,0,0.05805333455403646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,128,1,fp8,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,float16,0,0.06035733222961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,fp8,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,128,1,fp8,fp8,0,0.05630933245023092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.05527466535568237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,float16,0,0.06044266621271769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,128,1,fp8,fp8,0,0.057999998331069946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,float16,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.05798399945100149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,128,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,float16,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,128,1,fp8,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,128,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.03654933224121729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,float16,0,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,fp8,0,0.03931200007597605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,128,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.037274666130542755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,float16,0,0.037871999045213066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,fp8,0,0.03921599934498469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,128,1,fp8,fp8,0,0.03885866701602936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,float16,0,0.03934400031963984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,128,1,fp8,fp8,0,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,float16,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,128,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,128,1,fp8,fp8,0,0.0245919997493426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.025765334566434223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,float16,0,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,float16,0,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,128,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.026901334524154663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,fp8,0,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,128,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.020143999407688778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,128,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,float16,0,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,128,1,fp8,fp8,0,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,float16,0,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,float16,0,0.3773706754048665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.3747680187225342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,fp8,0,0.3736319939295451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,128,1,fp8,fp8,0,0.3646986484527588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.37270931402842206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.3640586535135905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,float16,0,0.37965865929921466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,0.3792320092519124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,fp8,0,0.37758398056030273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,128,1,fp8,fp8,0,0.3553546667098999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,0.37747732798258465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.35492265224456787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,float16,0,0.3877973159154256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,0.38964800039927167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,fp8,0,0.38369067509969074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,128,1,fp8,fp8,0,0.3856106599171956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,0.3831413189570109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.38466131687164307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,float16,0,0.3792906602223714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,0.38156267007191974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,fp8,0,0.3746933142344157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,128,1,fp8,fp8,0,0.3880053361256917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,0.3741226593653361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,0.3858666817347209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,float16,0,0.20345600446065268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,0.20336532592773438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,fp8,0,0.19969600439071655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,128,1,fp8,fp8,0,0.20505066712697348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,0.20089600483576456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.20651199420293173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,float16,0,0.19381332397460938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.1937119960784912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,fp8,0,0.19378133614857992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,128,1,fp8,fp8,0,0.1880106727282206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.19388800859451294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.18833067019780478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,float16,0,0.1967573364575704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.19729600350062051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,fp8,0,0.1954666574796041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,128,1,fp8,fp8,0,0.1835306684176127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.19635732968648276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.18274666865666708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,float16,0,0.20365333557128906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.20223466555277506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,fp8,0,0.1997493306795756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,128,1,fp8,fp8,0,0.19755733013153076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.20004800955454508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.19762667020161948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,float16,0,0.19770665963490805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.19758933782577515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,fp8,0,0.19450666507085165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,128,1,fp8,fp8,0,0.1957333286603292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.1956640084584554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.19820799430211386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,float16,0,0.11223999659220378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.1123413344224294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,fp8,0,0.11000532905260722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,128,1,fp8,fp8,0,0.11347200473149617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.11158399780591328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.11386133233706157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,float16,0,0.10551466544469197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.10553066929181416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,fp8,0,0.10466133554776509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,128,1,fp8,fp8,0,0.09934932986895244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.10506666700045268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.10006933410962422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,float16,0,0.1074079970518748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.10713600118954976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,fp8,0,0.10547199845314026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,128,1,fp8,fp8,0,0.0995146632194519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.10557867089907329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.09894933303197224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,float16,0,0.10925867160161336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.10970133543014526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,fp8,0,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,128,1,fp8,fp8,0,0.10704533259073894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.10748799641927083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.10538666447003682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,float16,0,0.10749866565068562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.10756799578666687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,fp8,0,0.10540266831715901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,128,1,fp8,fp8,0,0.10731200377146403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.10590933760007222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.1060640017191569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,float16,0,0.061103999614715576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.062368000547091164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,128,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.06216000020503998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,float16,0,0.05831466615200043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.058431997895240784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,fp8,0,0.05996799965699514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,128,1,fp8,fp8,0,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.05625066657861074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,float16,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.06039466460545858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,128,1,fp8,fp8,0,0.05755733450253805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.05972266693909963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,float16,0,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.061861331264177956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,fp8,0,0.06010666489601135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,128,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.060085331400235496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,float16,0,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.06012799839178721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,fp8,0,0.058602665861447654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,128,1,fp8,fp8,0,0.058320000767707825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,float16,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.04049066702524821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.04038933416207632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,float16,0,0.040720000863075256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,fp8,0,0.04010133445262909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,128,1,fp8,fp8,0,0.038149334490299225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.03919466584920883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.04002666721741358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,128,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.038149334490299225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,float16,0,0.040922666589419045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.04125333329041799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,fp8,0,0.04131199916203817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,128,1,fp8,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,float16,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,fp8,0,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,128,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,128,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.026922665536403656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,128,1,fp8,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,128,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,float16,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,128,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,fp8,0,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,128,1,fp8,fp8,0,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,float16,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,128,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,float16,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,128,1,fp8,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,128,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,float16,0,0.2688586711883545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.26927467187245685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,fp8,0,0.26731733481089276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,128,1,fp8,fp8,0,0.25632532437642414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.26522133747736615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.25489600499471027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,float16,0,0.2694080074628194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.26930665969848633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,fp8,0,0.26705066363016766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,128,1,fp8,fp8,0,0.249290664990743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.2661813298861186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.2490453322728475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,float16,0,0.273525337378184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.2730453411738078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,fp8,0,0.2708959976832072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,128,1,fp8,fp8,0,0.26260266701380414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.2699199914932251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.2614346742630005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,float16,0,0.26845333973566693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.2682773272196452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,fp8,0,0.26974932352701825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,128,1,fp8,fp8,0,0.26123199860254925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.2682560086250305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.2606559991836548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,float16,0,0.1481760044892629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.14818666378657022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,fp8,0,0.14653333028157553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,128,1,fp8,fp8,0,0.14594667156537375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.14602133631706238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.14479466279347739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,float16,0,0.13993600010871887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.1399626632531484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,fp8,0,0.13991999626159668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,128,1,fp8,fp8,0,0.13200533390045166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.1400320033232371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.1318186620871226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,float16,0,0.14009599884351095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.1404213309288025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,fp8,0,0.14034666617711386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,128,1,fp8,fp8,0,0.1325386663277944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.13949867089589438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.1313706636428833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,float16,0,0.14390933513641357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.14220800002415976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,fp8,0,0.14223466316858926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,128,1,fp8,fp8,0,0.1384106675783793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.14290133118629456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.13818666338920593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,float16,0,0.14050666491190592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.1423360009988149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,fp8,0,0.14205867052078247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,128,1,fp8,fp8,0,0.1377066671848297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.14219733079274496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.1400266687075297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,float16,0,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.08130133152008057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,fp8,0,0.0796319991350174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,128,1,fp8,fp8,0,0.08108266691366832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.08103999992211659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,float16,0,0.07695466776688893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.0766133318344752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,fp8,0,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,128,1,fp8,fp8,0,0.07274133463700612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.07689600189526875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.07373333474000295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,float16,0,0.07886933286984761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,fp8,0,0.07663999994595845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,128,1,fp8,fp8,0,0.07260799904664357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,float16,0,0.07892799874146779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.07890133559703827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,fp8,0,0.0775679995616277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,128,1,fp8,fp8,0,0.07486933469772339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.07870399951934814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,float16,0,0.07825600107510884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.0786186655362447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,fp8,0,0.07663999994595845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,128,1,fp8,fp8,0,0.07554133236408234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.07691200077533722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.07502933343251546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,float16,0,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,128,1,fp8,fp8,0,0.04696000119050344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,float16,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.04773333172003428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,128,1,fp8,fp8,0,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.047466665506362915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,float16,0,0.047498668233553566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.04714666803677877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,fp8,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,128,1,fp8,fp8,0,0.04402133325735728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,128,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.0465280016263326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,float16,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.04762666424115499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,fp8,0,0.04658666749795278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,128,1,fp8,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.0481333335240682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,128,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,float16,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,128,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.03068800022204717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,128,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.031002665559450786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.03183466692765554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,128,1,fp8,fp8,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.031871999303499855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,float16,0,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,128,1,fp8,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,128,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,float16,0,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.022170667846997578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,128,1,fp8,fp8,0,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,float16,0,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.02207999924818675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,128,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,fp8,0,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,128,1,fp8,fp8,0,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,float16,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,128,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,128,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,128,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,128,1,fp8,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,float16,0,0.21428799629211426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.21387199560801187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,fp8,0,0.2121386726697286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,128,1,fp8,fp8,0,0.19666133324305216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.21182932456334433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.19706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,float16,0,0.2138026754061381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.21386132637659708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,fp8,0,0.2128480076789856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,128,1,fp8,fp8,0,0.1981546680132548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.21299733718236288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.19815999269485474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,float16,0,0.21529066562652588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.21530665953954062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,fp8,0,0.21569599707921347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,128,1,fp8,fp8,0,0.2032960057258606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.2140000065167745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.20233599344889322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,float16,0,0.2141973376274109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.213919997215271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,fp8,0,0.21549866596857706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,128,1,fp8,fp8,0,0.20580265919367471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.21541333198547363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.20571200052897134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,float16,0,0.11541866262753804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.11543466647466023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,fp8,0,0.11504532893498738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,128,1,fp8,fp8,0,0.11199999849001567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.11540800333023071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.1126026709874471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,float16,0,0.11160533626874287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.11188266674677531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,fp8,0,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,128,1,fp8,fp8,0,0.10550399621327718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.11213866869608562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.10531199971834819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,float16,0,0.11185066898663838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.11188266674677531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,128,1,fp8,fp8,0,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.11297067006429036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.10547199845314026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,float16,0,0.11369066437085469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.1136799951394399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,fp8,0,0.1132533351580302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,128,1,fp8,fp8,0,0.107205331325531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.11379733681678772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.10796266794204712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,float16,0,0.11183999975522359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.11192533373832703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,fp8,0,0.11175466577212016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,128,1,fp8,fp8,0,0.10735999544461568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.11320533355077107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.10883733630180359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,float16,0,0.06654933094978333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.0661599983771642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,fp8,0,0.06584533552328746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,128,1,fp8,fp8,0,0.06407999992370605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.06588799754778545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.06408533453941345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,float16,0,0.06587733328342438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.06595733265082042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,fp8,0,0.06445333361625671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,128,1,fp8,fp8,0,0.06127466758092245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.06619200110435486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.06216000020503998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,float16,0,0.06618666648864746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.06649066507816315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,fp8,0,0.06468266745408376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,128,1,fp8,fp8,0,0.06181866427262624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.06424533327420552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.060693333546320595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,float16,0,0.06651733318964641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.0664160003264745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,fp8,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,128,1,fp8,fp8,0,0.06409599880377452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.06481066842873891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.06260266900062561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,float16,0,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.06628266473611195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,fp8,0,0.06601066887378693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,128,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.06485866506894429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.06346133351325989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,float16,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,128,1,fp8,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,float16,0,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,fp8,0,0.04048533240954081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,128,1,fp8,fp8,0,0.03800000001986822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.0403413325548172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,float16,0,0.0391893337170283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,fp8,0,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,128,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.040149333576361336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.04011733333269755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,128,1,fp8,fp8,0,0.03809066613515218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,float16,0,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.03990933299064636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,128,1,fp8,fp8,0,0.037978666524092354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,float16,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,128,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,128,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,128,1,fp8,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,fp8,0,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,128,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,float16,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,128,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,float16,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,128,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,float16,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,128,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,128,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,float16,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,float16,0,0.1832053263982137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.18318933248519897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,fp8,0,0.18331732352574667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,128,1,fp8,fp8,0,0.17179199059804282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.18081067005793253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.17084799210230509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,float16,0,0.18498667081197104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.18330132961273193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,fp8,0,0.18291733662287393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,128,1,fp8,fp8,0,0.17091200749079385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.18291199207305908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.17062399784723917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,float16,0,0.1851200064023336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.1853440006573995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,fp8,0,0.18509334325790405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,128,1,fp8,fp8,0,0.1750026742617289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.18349866072336832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.17339734236399332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,float16,0,0.1832586725552877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.18324265877405801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,fp8,0,0.18416533867518106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,128,1,fp8,fp8,0,0.1750613252321879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.18316799402236938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.17494932810465494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,float16,0,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.10124799609184265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,fp8,0,0.10133333007494609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,128,1,fp8,fp8,0,0.09848533074061076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.10155199964841206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.09734933574994405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,float16,0,0.09939733147621155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.0997759997844696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,fp8,0,0.10130666693051656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,128,1,fp8,fp8,0,0.09481599926948547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.10128532846768697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.095360000928243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,float16,0,0.09946133693059285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.0995840032895406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,fp8,0,0.10085866848627727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,128,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.0995093286037445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.09342400232950847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,float16,0,0.10121599833170573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.10102400183677673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,fp8,0,0.10108799735705058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,128,1,fp8,fp8,0,0.09599467118581136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.10142933328946431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.09622933467229207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,float16,0,0.10117333134015401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.10044266780217488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,fp8,0,0.10105066498120625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,128,1,fp8,fp8,0,0.09523733456929524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.10088533163070679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.09678933024406433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.05801066756248474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,fp8,0,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,128,1,fp8,fp8,0,0.056015998125076294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.058304001887639366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.05604266623655955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,float16,0,0.05637866755326589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.058005332946777344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,128,1,fp8,fp8,0,0.0543146679798762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.05429333448410034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,float16,0,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.058186665177345276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,fp8,0,0.05798399945100149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,128,1,fp8,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.05783999959627787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.054192001620928444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,float16,0,0.05806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,128,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.05602666735649109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,float16,0,0.05797866483529409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,128,1,fp8,fp8,0,0.0613919993241628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.058389330903689064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,float16,0,0.035487999518712364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,128,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,float16,0,0.03608000030120214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,128,1,fp8,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,128,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.03602666656176249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,128,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,fp8,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,128,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,float16,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.026309333741664886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,128,1,fp8,fp8,0,0.024533333877722423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.026261332134405773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,float16,0,0.025754667818546295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,128,1,fp8,fp8,0,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,float16,0,0.025637333591779072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,128,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,float16,0,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,128,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,fp8,0,0.8493279616038004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,128,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,float16,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,128,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,128,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,float16,0,1.36627197265625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,fp8,0,1.3766454060872395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,128,1,fp8,fp8,0,1.243791977564494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,7.986410776774089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,8.002885182698568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,float16,0,1.3857866923014324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,7.3668638865153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,fp8,0,1.3949707349141438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,128,1,fp8,fp8,0,1.266650676727295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,8.017029444376627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,8.019013086954752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,float16,0,1.3942987124125164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,7.395615895589192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,fp8,0,1.4198773701985676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,128,1,fp8,fp8,0,1.2821386655171711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,8.024805068969727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,8.046192169189453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,float16,0,1.4327200253804524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,7.406229019165039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,fp8,0,1.446656068166097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,128,1,fp8,fp8,0,1.3252373536427815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,8.0829226175944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,8.090719858805338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,float16,0,0.8083146413167318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,7.455743789672852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,fp8,0,0.8270133336385092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,128,1,fp8,fp8,0,0.7683520317077637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,4.1973066329956055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,3.879701296488444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,4.219930648803711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,float16,0,0.7203359603881836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,fp8,0,0.7263733545939127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,128,1,fp8,fp8,0,0.6584373315175375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,4.085728009541829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,4.089125315348308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,3.770496050516764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,float16,0,0.7250773111979166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,fp8,0,0.7310187021891276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,128,1,fp8,fp8,0,0.6645866632461548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,4.09062385559082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,4.095877329508464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,float16,0,0.7316319942474365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,3.778832117716471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,fp8,0,0.7403146425882975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,128,1,fp8,fp8,0,0.6727200349171957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,4.097951889038086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,4.107130686442058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,float16,0,0.7456586360931396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,3.7834078470865884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,fp8,0,0.7568639914194742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,128,1,fp8,fp8,0,0.6935199896494547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,4.122591972351074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,4.130304018656413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,float16,0,0.45014933745066327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,3.806965192159017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,fp8,0,0.4617439905802409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,128,1,fp8,fp8,0,0.4326293468475342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,2.1940266291300454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,2.2070186932881675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,2.034224033355713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,float16,0,0.40651198228200275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,fp8,0,0.4094293514887492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,2.1391894022623696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,128,1,fp8,fp8,0,0.37693333625793457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,2.142255942026774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,1.9801759719848633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,float16,0,0.41018664836883545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,fp8,0,0.41319998105367023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,128,1,fp8,fp8,0,0.38195733229319256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,2.141717274983724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,2.146597385406494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,1.98199462890625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,float16,0,0.4140426715215047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,fp8,0,0.42051732540130615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,128,1,fp8,fp8,0,0.38577600320180255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,2.146735986073812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,2.1512266794840493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,1.9876319567362468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,float16,0,0.42186133066813153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,fp8,0,0.42764798800150555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,128,1,fp8,fp8,0,0.396229346593221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,2.156378746032715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,1.9970879554748535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,2.162293275197347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,float16,0,0.3110239903132121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,fp8,0,0.3108479976654053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,1.2341972986857097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,128,1,fp8,fp8,0,0.28972800572713214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,1.2353280385335286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,1.1391466458638508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,float16,0,0.3102239966392517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,fp8,0,0.31038933992385864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,1.2250133355458577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,128,1,fp8,fp8,0,0.2913813392321269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,1.2259573141733806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,1.139845371246338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,float16,0,0.3100213408470154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,fp8,0,0.30906132857004803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,1.2263200283050537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,128,1,fp8,fp8,0,0.2893226742744446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,1.22762664159139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,1.1370293299357097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,float16,0,0.31017067035039264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,fp8,0,0.31138134002685547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,1.226197322209676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,128,1,fp8,fp8,0,0.29073599974314374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,1.2270507017771404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,1.1400319735209148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,float16,0,0.31083200375239056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,fp8,0,0.31003199021021527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,1.2283679644266765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,128,1,fp8,fp8,0,0.28962133328119916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,1.2290666898091633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,1.1393493016560872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,float16,0,1.0316906770070393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,fp8,0,1.0326826572418213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,128,1,fp8,fp8,0,0.9291093349456787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,4.732672055562337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,4.743818600972493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,4.3615414301554365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,float16,0,1.0309333006540935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,fp8,0,1.0447466373443604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,128,1,fp8,fp8,0,0.9504319826761881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,4.742047945658366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,4.752773284912109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,4.373973210652669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,float16,0,1.0393866697947185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,fp8,0,1.0515306790669758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,128,1,fp8,fp8,0,0.9549173514048258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,4.754293441772461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,4.768197377522786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,4.387925465901692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,float16,0,1.0646506945292156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,fp8,0,1.0779200394948323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,128,1,fp8,fp8,0,0.9869226614634196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,4.788037300109863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,4.804106712341309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,float16,0,0.6106826861699423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,4.421488126118978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,fp8,0,0.6251093149185181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,128,1,fp8,fp8,0,0.5797653198242188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,2.5224266052246094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,2.5368800163269043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,2.3339573542277017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,float16,0,0.5437813202540079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,fp8,0,0.5487360159556071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,128,1,fp8,fp8,0,0.49994667371114093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,2.4372053146362305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,2.441744009653727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,2.252101262410482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,float16,0,0.5480639934539795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,fp8,0,0.5523573160171509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,128,1,fp8,fp8,0,0.503653327624003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,2.4444640477498374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,2.445674737294515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,2.2586986223856607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,float16,0,0.5530080000559489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,fp8,0,0.5593013366063436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,128,1,fp8,fp8,0,0.5106346607208252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,2.45090659459432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,2.456117312113444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,2.265221277872721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,float16,0,0.564245343208313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,2.4642186164855957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,fp8,0,0.5721760193506876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,128,1,fp8,fp8,0,0.5238453149795532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,2.4731200536092124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,float16,0,0.3428479830423991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,2.275621255238851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,1.335653305053711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,fp8,0,0.3529333273569743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,128,1,fp8,fp8,0,0.32841066519419354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,1.3443999290466309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,1.2418453693389893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,float16,0,0.30850134293238324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,fp8,0,0.3104426662127177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,1.291536013285319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,128,1,fp8,fp8,0,0.2877333362897237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,1.2946986357371013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,1.1980799833933513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,float16,0,0.3102560043334961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,fp8,0,0.3123040000597636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,1.2956799666086833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,128,1,fp8,fp8,0,0.2895306746164958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,1.2990293502807617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,1.2038400173187256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,float16,0,0.3142186601956685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,fp8,0,0.3183093269666036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,1.3013813495635986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,128,1,fp8,fp8,0,0.2958186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,1.3028586705525715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,1.20414400100708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,float16,0,0.32038400570551556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,fp8,0,0.32449599107106525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,1.3086826801300049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,128,1,fp8,fp8,0,0.30107732613881427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,1.312986691792806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,1.2143413225809734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,float16,0,0.23585599660873413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,fp8,0,0.23675199349721274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,0.7745280265808105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,128,1,fp8,fp8,0,0.2220053275426229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,0.7742986679077148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,0.7150506973266602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,float16,0,0.232314666112264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,fp8,0,0.23411200443903604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,0.766586701075236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,128,1,fp8,fp8,0,0.21940267086029053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,0.7666506767272949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,0.711680014928182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,float16,0,0.2355039914449056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,fp8,0,0.2343733310699463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,0.7661013603210449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,128,1,fp8,fp8,0,0.22007467349370322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,0.7650079727172852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,float16,0,0.23412799835205078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,0.7128106753031412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,fp8,0,0.2345013419787089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,0.7662773132324219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,128,1,fp8,fp8,0,0.22002132733662924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,0.7143893241882324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,0.7667840321858724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,float16,0,0.23637332518895468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,fp8,0,0.2364906668663025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,128,1,fp8,fp8,0,0.22177600860595703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,0.7709173361460367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,0.7150293191274008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,float16,0,0.8526346683502197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,3.419722557067871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,fp8,0,0.8597386678059896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,128,1,fp8,fp8,0,0.7747946580251058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,3.426682790120443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,3.1507625579833984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,float16,0,0.8573866685231527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,3.4291680653889975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,fp8,0,0.867194652557373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,128,1,fp8,fp8,0,0.7832427024841309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,3.4351253509521484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,3.1603307723999023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,float16,0,0.8713119824727377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,3.4392213821411133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,fp8,0,0.8771893183390299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,128,1,fp8,fp8,0,0.7942079703013102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,3.4485438664754233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,3.1706720987955728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,float16,0,0.8877600034077963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,3.4653278986612954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,fp8,0,0.8977866967519125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,128,1,fp8,fp8,0,0.8189333279927572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,3.4763946533203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,3.1959145863850913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,float16,0,0.511733333269755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,1.8398933410644531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,fp8,0,0.523199995358785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,128,1,fp8,fp8,0,0.4852373202641805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,1.8514933586120605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,1.7039839426676433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,float16,0,0.4548480113347371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,1.7701652844746907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,fp8,0,0.4606399933497111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,128,1,fp8,fp8,0,0.4187519947687785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,1.7744587262471516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,1.640384038289388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,float16,0,0.4599200089772542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,1.7763892809549968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,fp8,0,0.46504000822703045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,128,1,fp8,fp8,0,0.42626134554545086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,1.7786827087402344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,1.6406453450520833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,float16,0,0.46350932121276855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,1.7814985911051433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,fp8,0,0.4699999888737996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,128,1,fp8,fp8,0,0.4291466474533081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,1.7873387336730957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,1.6463093757629395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,float16,0,0.47276798884073895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,1.7941172917683919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,fp8,0,0.4803520043690999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,128,1,fp8,fp8,0,0.4391839901606242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,1.7995840708414714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,float16,0,0.2857866684595744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,1.663434664408366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,fp8,0,0.29411200682322186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,0.9844213326772054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,128,1,fp8,fp8,0,0.27535466353098553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,0.9920213222503662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,0.9148533344268799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,float16,0,0.25674132506052655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,fp8,0,0.2590346733729045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,0.9465973377227783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,128,1,fp8,fp8,0,0.24151466290156046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,0.8855306307474772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,0.9497493108113607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,float16,0,0.25678932666778564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,fp8,0,0.2603893280029297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,0.9494933287302653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,128,1,fp8,fp8,0,0.243504007657369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,0.9515626430511475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,0.8855040073394775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,float16,0,0.2609386642773946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,fp8,0,0.26418666044871014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,0.954912026723226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,128,1,fp8,fp8,0,0.24699199199676514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,0.9560800393422445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,0.8868373235066732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,float16,0,0.2674293319384257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,fp8,0,0.2716906666755676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,0.9607093334197998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,128,1,fp8,fp8,0,0.2546079953511556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,0.9640693664550781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,0.8943893114725748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,float16,0,0.20174400011698404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,fp8,0,0.20164799690246582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,0.5821813344955444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,128,1,fp8,fp8,0,0.18980266650517783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,0.5809866587320963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,0.5390506585439047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,float16,0,0.19850132862726846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,fp8,0,0.19766932725906372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,0.574341336886088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,128,1,fp8,fp8,0,0.18596800168355307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,0.5756533145904541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,0.53656534353892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,float16,0,0.20125865936279297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,fp8,0,0.2006346583366394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,0.576533317565918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,128,1,fp8,fp8,0,0.18760534127553305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,0.576746662457784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,0.5357706546783447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,float16,0,0.20124266544977823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,fp8,0,0.1981653372446696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,0.5748853286107382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,128,1,fp8,fp8,0,0.185370663801829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,0.5761919816335043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,0.5383466482162476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,float16,0,0.1999946633974711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,fp8,0,0.20009066661198935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,0.578655997912089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,128,1,fp8,fp8,0,0.18889600038528442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,0.5774826606114706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,0.5372906525929769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,float16,0,1.3295146624247234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,fp8,0,1.3388214111328125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,128,1,fp8,fp8,0,1.2053866386413574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,4.542949358622233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,4.177194595336914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,4.551642735799153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,float16,0,1.3490079243977864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,fp8,0,1.3836906750996907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,4.5637814203898115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,128,1,fp8,fp8,0,1.2286400000254314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,4.201072057088216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,4.574938774108887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,float16,0,1.3592373530069988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,fp8,0,1.3707520167032878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,4.5816958745320635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,128,1,fp8,fp8,0,1.2452586491902669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,4.217023849487305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,4.592565218607585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,float16,0,1.398314634958903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,fp8,0,1.4242293039957683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,4.628522555033366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,128,1,fp8,fp8,0,1.290986696879069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,4.643930753072103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,float16,0,0.7751359939575195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,4.265103975931804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,2.428213278452555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,fp8,0,0.8077706495920817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,128,1,fp8,fp8,0,0.7322239875793457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,2.243600050608317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,2.444746653238932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,float16,0,0.6847253640492758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,fp8,0,0.690778652826945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,2.3166826566060386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,128,1,fp8,fp8,0,0.6235520044962565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,2.3233493169148765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,2.134789307912191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,float16,0,0.6900959809621176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,fp8,0,0.6974026362101237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,2.3216212590535483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,128,1,fp8,fp8,0,0.631493330001831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,2.3296640714009604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,2.140117327372233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,float16,0,0.6955520311991373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,fp8,0,0.7039519945780436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,2.3318506876627603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,128,1,fp8,fp8,0,0.6386133432388306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,2.1493919690450034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,2.341872056325277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,float16,0,0.7119839986165365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,fp8,0,0.7220160166422526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,2.35426664352417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,128,1,fp8,fp8,0,0.6573386589686075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,2.3646772702534995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,2.16485865910848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,float16,0,0.41442131996154785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,fp8,0,0.4230186541875203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,1.2635306517283122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,128,1,fp8,fp8,0,0.39186131954193115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,1.2737759749094646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,1.1734399795532227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,float16,0,0.36653868357340497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,fp8,0,0.369759996732076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,1.2088053226470947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,128,1,fp8,fp8,0,0.3389813502629598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,1.213311990102132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,1.1177706718444824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,float16,0,0.369920015335083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,fp8,0,0.37379201253255206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,1.2111199696858723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,128,1,fp8,fp8,0,0.34382931391398114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,1.215178648630778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,1.1219627062479656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,float16,0,0.37438400586446124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,fp8,0,0.37887465953826904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,1.217519998550415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,128,1,fp8,fp8,0,0.3469173510869344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,1.2226293087005615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,1.1246986389160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,float16,0,0.38280534744262695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,fp8,0,0.3882453441619873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,1.227002700169881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,128,1,fp8,fp8,0,0.3556106487909953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,1.232805331548055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,1.1348106861114502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,float16,0,0.23612266778945923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,fp8,0,0.24131200710932413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,0.6854346593221029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,128,1,fp8,fp8,0,0.22645866870880127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,0.6928799947102865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,0.6456960042317709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,float16,0,0.2099413275718689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,fp8,0,0.21184533834457397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,0.6551946798960367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,128,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,0.6562186479568481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,0.622165322303772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,float16,0,0.2105706731478373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,fp8,0,0.21326400836308798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,0.654368003209432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,128,1,fp8,fp8,0,0.1981333295504252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,0.6548373301823934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,0.6169546842575073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,float16,0,0.21240532398223877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,fp8,0,0.2155946691830953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,0.6606026490529379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,128,1,fp8,fp8,0,0.2037066618601481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,0.6636373202006022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,0.6279626687367758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,float16,0,0.21981332699457803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,fp8,0,0.2225280006726583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,0.6658293406168619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,128,1,fp8,fp8,0,0.20774932702382407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,0.6676586469014486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,0.6274986664454142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,float16,0,0.16668800512949625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,fp8,0,0.16672533750534058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,0.4166880051294963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,128,1,fp8,fp8,0,0.15552533666292825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,0.41599468390146893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,0.3841866652170817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,float16,0,0.16262400150299072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,0.41327468554178876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,fp8,0,0.16484799981117249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,128,1,fp8,fp8,0,0.15416533748308817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,0.4126186768213908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,0.38394665718078613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,float16,0,0.1630293329556783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,0.4105600118637085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,fp8,0,0.1627893348534902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,128,1,fp8,fp8,0,0.15230400363604227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,0.41252267360687256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,0.3826346794764201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,float16,0,0.1646666626135508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,0.41258132457733154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,fp8,0,0.16405333081881204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,128,1,fp8,fp8,0,0.1525973379611969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,0.411141316095988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,0.382698655128479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,float16,0,0.16482133666674295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,0.4132106701533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,fp8,0,0.16459733247756958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,128,1,fp8,fp8,0,0.1544266641139984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,0.41353599230448407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,0.38388800621032715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,float16,0,0.9948053359985352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,fp8,0,1.0031466484069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,2.7657972971598306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,128,1,fp8,fp8,0,0.898634672164917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,2.774778683980306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,2.5325759251912436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,float16,0,1.0046719710032146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,fp8,0,1.0138239860534668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,2.779616038004557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,128,1,fp8,fp8,0,0.9145706494649252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,2.7882080078125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,2.554389317830404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,float16,0,1.0121973355611165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,fp8,0,1.0225813388824463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,2.7911094029744468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,128,1,fp8,fp8,0,0.9260746637980143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,2.801552136739095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,2.5662453969319663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,float16,0,1.0379040241241455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,fp8,0,1.0487146377563477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,2.8254772822062173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,128,1,fp8,fp8,0,0.9584000110626221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,2.834986686706543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,2.5987680753072104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,float16,0,0.5866026480992635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,fp8,0,0.5996640125910441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,1.5028907457987468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,128,1,fp8,fp8,0,0.5528800090154012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,1.5146133104960124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,1.3925706545511882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,float16,0,0.5165919860204061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,fp8,0,0.5217013359069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,1.422533353169759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,128,1,fp8,fp8,0,0.4734933376312256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,1.4269545873006184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,1.3097600142161052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,float16,0,0.521722674369812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,fp8,0,0.5272800127665201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,1.4271893501281738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,128,1,fp8,fp8,0,0.47669867674509686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,1.4313546816507976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,1.3143466313680012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,float16,0,0.5269066492716471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,fp8,0,0.5324480136235555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,1.4356160163879395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,128,1,fp8,fp8,0,0.48346134026845294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,1.4416106541951497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,1.3215786616007488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,float16,0,0.5391573508580526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,fp8,0,0.546618660291036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,1.4500640233357747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,128,1,fp8,fp8,0,0.49673600991566974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,1.4571146965026855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,1.3345759709676106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,float16,0,0.3155733346939087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,fp8,0,0.32257600625356037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,0.7916426658630371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,128,1,fp8,fp8,0,0.29967466990152997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,0.7994773387908936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,0.7377973397572836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,float16,0,0.2795093258221944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,fp8,0,0.28134934107462567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,0.7493333021799723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,128,1,fp8,fp8,0,0.26104533672332764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,0.7512959639231364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,0.6954826513926188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,float16,0,0.2807626724243164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,fp8,0,0.2836586634318034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,0.7517759799957275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,128,1,fp8,fp8,0,0.26308266321818036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,0.7547946770985922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,0.6980000336964926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,float16,0,0.28602667649586994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,fp8,0,0.28859732548395794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,0.7569546699523926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,128,1,fp8,fp8,0,0.26528533299763996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,0.7595946788787842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,0.7026666800181071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,float16,0,0.29174933830897015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,0.7654293378194174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,fp8,0,0.2959253390630086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,128,1,fp8,fp8,0,0.2716853419939677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,0.7706027030944824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,0.7093813419342041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,float16,0,0.17891200383504233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,0.43783998489379883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,fp8,0,0.18404799699783325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,128,1,fp8,fp8,0,0.17473600308100382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,0.44314666589101154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,0.41576000054677326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,float16,0,0.15718932946523032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,0.4147626558939616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,fp8,0,0.15892266233762106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,128,1,fp8,fp8,0,0.14921599626541138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,0.4147040049235026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,0.3838026523590088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,float16,0,0.1586666703224182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,fp8,0,0.1604426701863607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,0.4143466552098592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,128,1,fp8,fp8,0,0.15064000089963278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,0.41486934820810956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,0.3864693244298299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,float16,0,0.1604586640993754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,fp8,0,0.16485866904258728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,0.41674665609995526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,128,1,fp8,fp8,0,0.15461867054303488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,0.417738676071167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,0.3913280169169108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,float16,0,0.16300800442695618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,fp8,0,0.16480533281962076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,0.42320001125335693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,128,1,fp8,fp8,0,0.16012266278266907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,0.4248480002085368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,0.40437865257263184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,float16,0,0.12416000167528789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,fp8,0,0.1253546675046285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.2777013381322225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,128,1,fp8,fp8,0,0.11886933445930481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.2733760078748067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,0.25496000051498413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,float16,0,0.12364799777666728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,fp8,0,0.12357333302497864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.2721760074297587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,128,1,fp8,fp8,0,0.11764267086982727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.27210666735967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,0.25459200143814087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,float16,0,0.12350400288899739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,fp8,0,0.12384532888730367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.2730666597684224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,128,1,fp8,fp8,0,0.11711999773979187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.2728959918022156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,0.2547786633173625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,float16,0,0.12359999616940816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,fp8,0,0.12354133526484172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.27379733324050903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,128,1,fp8,fp8,0,0.11570133765538533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.2727733254432678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,0.25330666700998944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,float16,0,0.1236853301525116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,fp8,0,0.12371200323104858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.27298132578531903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,128,1,fp8,fp8,0,0.11755200227101643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.2731146613756816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,0.2553760011990865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,float16,0,1.3101600011189778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,fp8,0,1.3188532988230388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,2.7958507537841797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,128,1,fp8,fp8,0,1.1863893667856853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,2.802687962849935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,2.5611146291097007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,float16,0,1.3321279684702556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,fp8,0,1.3422452608744304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,2.817514737447103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,128,1,fp8,fp8,0,1.2119999726613362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,2.828415870666504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,2.5850613911946616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,float16,0,1.3426720301310222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,2.830688158671061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,fp8,0,1.3540639877319336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,128,1,fp8,fp8,0,1.2262187004089355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,2.8426666259765625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,2.5985066095987954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,float16,0,1.3834667205810547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,fp8,0,1.3947359720865886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,2.877584139506022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,128,1,fp8,fp8,0,1.272058645884196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,2.8881813685099282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,2.6463467280069985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,float16,0,0.7584319909413656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,fp8,0,0.7734986941019694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,1.530186653137207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,128,1,fp8,fp8,0,0.7147146860758463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,1.5437280337015789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,1.4146080017089844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,float16,0,0.6651466687520345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,fp8,0,0.672544002532959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,1.4186719258626301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,128,1,fp8,fp8,0,0.6039359966913859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,1.4264480272928874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,1.3035146395365398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,float16,0,0.6712266604105631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,1.424741268157959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,fp8,0,0.6793333689371744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,128,1,fp8,fp8,0,0.6117920080820719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,1.4303359985351562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,1.3091946442921956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,float16,0,0.6787892977396647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,1.4351520538330078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,fp8,0,0.6987786293029785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,128,1,fp8,fp8,0,0.620031992594401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,1.4381333986918132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,1.317519982655843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,float16,0,0.6953972975413004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,fp8,0,0.7043573061625162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,1.4534932772318523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,128,1,fp8,fp8,0,0.6399893363316854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,1.4633599917093914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,1.3381813367207844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,float16,0,0.39645334084828693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,fp8,0,0.4062826633453369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,0.7922613620758057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,128,1,fp8,fp8,0,0.3755626678466797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,0.8005173206329346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,0.7364799976348877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,float16,0,0.3489226500193278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,fp8,0,0.35253334045410156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,0.7371679941813151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,128,1,fp8,fp8,0,0.3226933280626933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,0.7414720058441162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,0.6833919684092203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,float16,0,0.3526666561762492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,fp8,0,0.35485867659250897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,0.7415359814961752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,128,1,fp8,fp8,0,0.3243306676546733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,0.7443786462148031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,0.6853546301523844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,float16,0,0.3579893509546916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,0.7455093065897623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,fp8,0,0.3617759943008423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,128,1,fp8,fp8,0,0.3299520015716553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,0.7515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,0.6903359889984131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,float16,0,0.36582398414611816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,fp8,0,0.369818647702535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,0.756330649058024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,128,1,fp8,fp8,0,0.33852799733479816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,0.7625386714935303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,0.6991999944051107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,float16,0,0.21567465861638388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,fp8,0,0.22096532583236694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,0.42510398228963214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,128,1,fp8,fp8,0,0.20680532852808634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,0.43197333812713623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,0.401962677637736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,float16,0,0.18738667170206705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,fp8,0,0.19024533033370972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,128,1,fp8,fp8,0,0.17712533473968506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,0.39418665568033856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,0.3675946791966756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,float16,0,0.18944533665974936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,0.39530134201049805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,fp8,0,0.19042134284973145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,128,1,fp8,fp8,0,0.17870400349299112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,0.3972959915796916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,0.36851731936136883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,float16,0,0.1935946742693583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,0.40760000546773273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,fp8,0,0.1996906598409017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,128,1,fp8,fp8,0,0.18308266003926596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,0.40120001633961994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,0.3734133243560791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,float16,0,0.1995733380317688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,0.40611199537913006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,fp8,0,0.20218666394551596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,128,1,fp8,fp8,0,0.18921067317326865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,0.4084106683731079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,0.3779360055923462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,float16,0,0.12609066565831503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.24177600940068564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,fp8,0,0.12998933593432108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,128,1,fp8,fp8,0,0.12478933731714885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.24445867538452148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,0.23255999883015951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,float16,0,0.1133013367652893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.22679466009140015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,fp8,0,0.11339733004570007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,128,1,fp8,fp8,0,0.1034453312555949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.22772266467412314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,0.210207998752594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,float16,0,0.11170132954915364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.22750933965047201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,fp8,0,0.11378666758537292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,128,1,fp8,fp8,0,0.10445333520571391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.22671467065811157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.21009600162506104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,float16,0,0.1129866639773051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.2279520034790039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,fp8,0,0.11546666423479716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,128,1,fp8,fp8,0,0.10588799913724263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.22921067476272583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.21389333407084146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,float16,0,0.11580800016721089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.23030400276184082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,fp8,0,0.11782399813334148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,128,1,fp8,fp8,0,0.11101866761843364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.233407994111379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,0.2169439991315206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,float16,0,0.08886933326721191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.16018666823705038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,fp8,0,0.08927466471989949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,128,1,fp8,fp8,0,0.08501866459846497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.1607306698958079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.15060266852378845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,float16,0,0.0885706643263499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.16038399934768677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,fp8,0,0.08919466535250346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,128,1,fp8,fp8,0,0.08463467160860698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.16035733620325723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.14863466223080954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,float16,0,0.08912533521652222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.1606666644414266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,fp8,0,0.08923733234405518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,128,1,fp8,fp8,0,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.15997866789499918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.1481760044892629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,float16,0,0.08922132849693298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.1606986622015635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,fp8,0,0.08987200260162354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,128,1,fp8,fp8,0,0.08474666873613994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.15995200475056967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.1485973298549652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,float16,0,0.08916800220807393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.1605280041694641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,fp8,0,0.09008000294367473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,128,1,fp8,fp8,0,0.08499733606974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.1611840029557546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.14986667037010193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,float16,0,0.9814506371816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,1.7693920135498047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,fp8,0,0.9901119867960612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,128,1,fp8,fp8,0,0.8878826300303141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,1.7770613034566243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,1.6204106012980144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,float16,0,1.011349360148112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,1.7842772801717122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,fp8,0,1.0021653175354004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,128,1,fp8,fp8,0,0.9039146900177002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,1.7925279935201008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,1.6368853251139324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,float16,0,1.0017600059509277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,1.7956053415934246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,fp8,0,1.0098880132039387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,128,1,fp8,fp8,0,0.9135093688964844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,1.8027413686116536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,1.6468106905619304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,float16,0,1.0274240175882976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,1.8220267295837402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,fp8,0,1.0370773474375408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,128,1,fp8,fp8,0,0.9458613395690918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,1.832357406616211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,1.679093360900879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,float16,0,0.5724373261133829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,0.9851733048756918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,fp8,0,0.5836746692657471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,128,1,fp8,fp8,0,0.5388213396072388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,0.9962720076243082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,0.9160373210906982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,float16,0,0.5037759939829508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,0.9083253542582194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,fp8,0,0.5065546830495199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,128,1,fp8,fp8,0,0.458954652150472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,0.9123520056406657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,0.8349546591440836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,float16,0,0.508458654085795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,0.9124800364176432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,fp8,0,0.5131093263626099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,128,1,fp8,fp8,0,0.4644266764322917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,0.9175733725229899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,0.8405973116556803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,float16,0,0.5127786795298258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,0.9179573059082031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,fp8,0,0.5202399889628092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,128,1,fp8,fp8,0,0.4704853296279907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,0.9250400066375732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,0.8462133407592773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,float16,0,0.5251733462015787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,0.9311412970225016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,fp8,0,0.532810648282369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,128,1,fp8,fp8,0,0.4846666653951009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,0.9375680287679037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,0.8589066664377848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,float16,0,0.3019253412882487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,0.5162880023320516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,fp8,0,0.30897066990534466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,128,1,fp8,fp8,0,0.2877279917399089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,0.5231680075327555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,0.484005331993103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,float16,0,0.2640586694081624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,0.47313066323598224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,fp8,0,0.26684266328811646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,128,1,fp8,fp8,0,0.2456000049908956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,0.47701867421468097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,0.44234665234883624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,float16,0,0.265610675017039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,0.4758453369140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,fp8,0,0.26948267221450806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,128,1,fp8,fp8,0,0.2491040031115214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,0.47970132033030194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,0.44441068172454834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,float16,0,0.2696746587753296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,0.4822346766789754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,fp8,0,0.2735146681467692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,128,1,fp8,fp8,0,0.2526773413022359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,0.48626665274302167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,0.448800007502238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,float16,0,0.2784480055173238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,0.4888213475545247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,fp8,0,0.2804800073305766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,128,1,fp8,fp8,0,0.2590293288230896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,0.4946399927139282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,0.4554506540298462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,float16,0,0.1649440030256907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,0.28757333755493164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,fp8,0,0.16873600085576376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,128,1,fp8,fp8,0,0.15990933775901794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,0.28624532620112103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,0.2671999931335449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,float16,0,0.14301333824793497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.25709333022435504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,fp8,0,0.1439839998881022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,128,1,fp8,fp8,0,0.13212266564369202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.2584693431854248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,0.2387040058771769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,float16,0,0.14377599954605103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.25805334250132245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,fp8,0,0.14437333742777506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,128,1,fp8,fp8,0,0.1341546674569448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.2600640058517456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,0.24267200628916422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,float16,0,0.14541866381963095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.2605920036633809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,fp8,0,0.14618133505185446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,128,1,fp8,fp8,0,0.13818132877349854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.2616533239682515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,0.24522666136423746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,float16,0,0.1504586637020111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.26500266790390015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,fp8,0,0.15245866775512695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,128,1,fp8,fp8,0,0.1443839967250824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.26733332872390747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,0.25123733282089233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,float16,0,0.09295466542243958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.1625706652800242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,fp8,0,0.09517866373062134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,128,1,fp8,fp8,0,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.16502933700879416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.15685333808263144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,float16,0,0.08499733606974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.15324800213178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,fp8,0,0.08494933446248372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,128,1,fp8,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.15465066830317178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.14363732933998108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,float16,0,0.08548266688982646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.154831995566686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,128,1,fp8,fp8,0,0.07909333209196727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.15502400199572244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.14408000310262045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,float16,0,0.08498133222262065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.15453333655993143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,fp8,0,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,128,1,fp8,fp8,0,0.08054933448632558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.15601600209871927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.14436266819636026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,float16,0,0.08821333448092143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.1567359964052836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,fp8,0,0.0888853371143341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,128,1,fp8,fp8,0,0.08282666901747386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.15659200151761374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.14749333262443542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,float16,0,0.06850666801134746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.11577600240707397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,fp8,0,0.06858133276303609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,128,1,fp8,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.11596266428629558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.10845333337783813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,float16,0,0.06836799780527751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.11563733220100403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,fp8,0,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,128,1,fp8,fp8,0,0.06445333361625671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.11494933565457661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,float16,0,0.06855466465155284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.11531200011571248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,fp8,0,0.06866666674613953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,128,1,fp8,fp8,0,0.06658133367697398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.11495467027028401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.10790933171908061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,float16,0,0.06942933301130931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.11632532874743144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,fp8,0,0.06849599877993266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,128,1,fp8,fp8,0,0.06467733283837636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.11427199840545654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,float16,0,0.06870399912198384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.11567999919255574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,fp8,0,0.06878933310508728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,128,1,fp8,fp8,0,0.06653333206971486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.11532800396283467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.10749866565068562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,float16,0,1.3255679607391357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,1.935754617055257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,fp8,0,1.3281066417694092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,128,1,fp8,fp8,0,1.1862026850382488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,1.9395306905110676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,1.759621302286784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,float16,0,1.3604747454325359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,1.9620960553487141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,fp8,0,1.3540053367614746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,128,1,fp8,fp8,0,1.2093760172526042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,1.9653760592142742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,1.7753547032674153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,float16,0,1.375930627187093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,1.9905385971069336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,fp8,0,1.3706879615783691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,128,1,fp8,fp8,0,1.2168426513671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,1.9822719891866047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,1.786512056986491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,float16,0,1.4059200286865234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,2.0199732780456543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,fp8,0,1.4030720392862956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,128,1,fp8,fp8,0,1.2612906297047932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,2.0198240280151367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,1.8346239725748699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,float16,0,0.7589706579844157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,1.0799466768900554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,fp8,0,0.7673280239105225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,128,1,fp8,fp8,0,0.7100799878438314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,1.0877333482106526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,1.0043093363444011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,float16,0,0.6597013473510742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,0.9717706839243571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,fp8,0,0.6646506786346436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,128,1,fp8,fp8,0,0.5971946716308594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,0.9767200152079264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,0.8889973163604736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,float16,0,0.6681866645812988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,0.9805333614349365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,fp8,0,0.6744106610616049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,128,1,fp8,fp8,0,0.6075093348821005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,0.9856213728586832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,0.8985599676767985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,float16,0,0.6746400197347006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,0.9863626956939697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,fp8,0,0.6800746917724609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,128,1,fp8,fp8,0,0.614250659942627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,0.9942773183186849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,0.9061066309611002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,float16,0,0.6927733421325684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,1.0071252981821697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,fp8,0,0.6982560157775879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,128,1,fp8,fp8,0,0.6345599889755249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,1.0129280090332031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,0.9264266490936279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,float16,0,0.38942933082580566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,0.5556266705195109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,fp8,0,0.3965546687444051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,128,1,fp8,fp8,0,0.36713067690531415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,0.5638826688130697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,0.5193813244501749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,float16,0,0.3399946689605713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,0.5011359850565592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,fp8,0,0.3433599869410197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,128,1,fp8,fp8,0,0.31253333886464435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,0.5045760075251261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,0.4636853138605754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,float16,0,0.34305067857106525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,0.5041600068410238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,fp8,0,0.3473066488901774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,128,1,fp8,fp8,0,0.31868799527486164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,0.5081813335418701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,0.4679679870605469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,float16,0,0.34942932923634845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,0.5113279819488525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,fp8,0,0.35347731908162433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,128,1,fp8,fp8,0,0.32130134105682373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,0.5244693358739217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,0.47204267978668213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,float16,0,0.3577440182367961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,0.5194773276646932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,fp8,0,0.362005352973938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,128,1,fp8,fp8,0,0.329802672068278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,0.5245333512624105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,0.4809066851933797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,float16,0,0.20769067605336508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,0.29526400566101074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,fp8,0,0.21392534176508585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,128,1,fp8,fp8,0,0.1995733380317688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,0.30080533027648926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,0.27988266944885254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,float16,0,0.17755200465520224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.26282666126887005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,fp8,0,0.17990932861963907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,128,1,fp8,fp8,0,0.16799465815226236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.265994668006897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,0.24875199794769287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,float16,0,0.1814026633898417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.2657279968261719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,fp8,0,0.1819093426068624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,128,1,fp8,fp8,0,0.16878400246302286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.2675199906031291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,0.2505706747372945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,float16,0,0.18327999114990234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.26817599932352704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,fp8,0,0.18545067310333252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,128,1,fp8,fp8,0,0.17283199230829874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.27161600192387897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,0.25491199890772503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,float16,0,0.19118932882944742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.27643199761708576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,fp8,0,0.19211200873057047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,128,1,fp8,fp8,0,0.17919466892878214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.27794132630030316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,0.259168008963267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,float16,0,0.11620266238848369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.16542933384577432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,fp8,0,0.11755200227101643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,128,1,fp8,fp8,0,0.11264533797899882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.16824533541997275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.1588106652100881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,float16,0,0.10213333368301392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.1502346694469452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,fp8,0,0.10290132959683736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,128,1,fp8,fp8,0,0.09287466605504353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.15121600031852722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.13803733388582864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,float16,0,0.10338667035102844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.15101866920789084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,fp8,0,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,128,1,fp8,fp8,0,0.09505066275596619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.1516800026098887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.1381386617819468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,float16,0,0.10446400443712871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.151936004559199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,fp8,0,0.103493332862854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,128,1,fp8,fp8,0,0.09522133072217305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.15223999818166098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.14083199699719748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,float16,0,0.10599999626477559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.15341867009798685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,fp8,0,0.10748799641927083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,128,1,fp8,fp8,0,0.10037333766619365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.15576000014940897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.14453333616256714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,float16,0,0.06793599824110667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.09921600421269734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,fp8,0,0.07049599786599477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,128,1,fp8,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.10139733552932739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.09505066275596619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.09525332848230998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,fp8,0,0.06481599807739258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,128,1,fp8,fp8,0,0.060271998246510826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.09503466884295146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.08771199981371562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,float16,0,0.06410133341948192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.09501333038012187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,fp8,0,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,128,1,fp8,fp8,0,0.06043733159701029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.09683199723561604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.08902933200200398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,float16,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.09523199995358785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,fp8,0,0.0664106657107671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,128,1,fp8,fp8,0,0.06149866680304209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.09646399815877278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.08879466851552327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,float16,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.09672533472379048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,fp8,0,0.06653333206971486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,128,1,fp8,fp8,0,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.09732799728711446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,float16,0,0.05426666637261709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.07473599910736084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,128,1,fp8,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.0705813318490982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,float16,0,0.05416533350944519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.07484266658624013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,128,1,fp8,fp8,0,0.052202666799227394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.07645333309968312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.07122133175532024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,float16,0,0.05472533404827118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.07502399881680806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,128,1,fp8,fp8,0,0.052239999175071716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.07533333202203114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.06939733525117238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.07477866609891255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,fp8,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,128,1,fp8,fp8,0,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.07486933469772339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.06958400209744771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,float16,0,0.05750933289527893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.07540266712506612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,fp8,0,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,128,1,fp8,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.07482666770617168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.07029866675535838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,float16,0,0.9734133084615072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,1.274234692255656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,fp8,0,0.9816693464914957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,128,1,fp8,fp8,0,0.8802719910939535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,1.2807679971059163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,1.1622719764709473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,float16,0,0.9897066752115885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,1.2906239827473958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,fp8,0,0.9969600041707357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,128,1,fp8,fp8,0,0.908357302347819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,1.2952693303426106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,1.175498644510905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,float16,0,1.0045920213063557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,1.3038079738616943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,fp8,0,1.0075093110402424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,128,1,fp8,fp8,0,0.9047253131866455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,1.3100533485412598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,1.2024959723154705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,float16,0,1.023093303044637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,1.323621352513631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,fp8,0,1.029253323872884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,128,1,fp8,fp8,0,0.9393440087636312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,1.3332266807556152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,1.2153386274973552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,float16,0,0.567360003789266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,0.728869358698527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,fp8,0,0.5788640181223551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,128,1,fp8,fp8,0,0.5419893264770508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,0.7391306559244791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,0.6831839879353842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,float16,0,0.5044373273849487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,0.6478613217671713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,fp8,0,0.4976160128911336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,128,1,fp8,fp8,0,0.45081067085266113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,0.6550186475118002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,0.5965706507364908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,float16,0,0.502784013748169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,0.6567946672439575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,fp8,0,0.5068000157674154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,128,1,fp8,fp8,0,0.45814398924509686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,0.6625279982884725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,0.6042506694793701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,float16,0,0.516485333442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,0.6607466538747152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,fp8,0,0.5108266671498617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,128,1,fp8,fp8,0,0.46399998664855957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,0.6680213610331217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,0.6090453465779623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,float16,0,0.5193973382314047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,0.6754720211029053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,fp8,0,0.5349173148473104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,128,1,fp8,fp8,0,0.4756853183110555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,0.6800426642100016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,0.620474656422933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,float16,0,0.29596267143885296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,0.38339734077453613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,fp8,0,0.30422399441401166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,128,1,fp8,fp8,0,0.28008532524108887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,0.3899253209431966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,0.3576853275299072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,float16,0,0.25645333528518677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.33604268232981366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,fp8,0,0.25778132677078247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,128,1,fp8,fp8,0,0.2376533349355062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,0.33827733993530273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,0.31329600016276044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,float16,0,0.25753066937128705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,0.3392639954884847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,fp8,0,0.261407991250356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,128,1,fp8,fp8,0,0.24590933322906494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,0.3422559897104899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,0.3167146643002828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,float16,0,0.26335465908050537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,0.3439679940541585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,fp8,0,0.2650773326555888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,128,1,fp8,fp8,0,0.2449493408203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,0.34693864981333417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,0.32177066802978516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,float16,0,0.27110934257507324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,0.35983999570210773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,fp8,0,0.2781546711921692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,128,1,fp8,fp8,0,0.25251199801762897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,0.35628799597422284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,0.3274186650911967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,float16,0,0.15891733765602112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.20517865816752115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,fp8,0,0.163674662510554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,128,1,fp8,fp8,0,0.15461333592732748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.20890667041142783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.19406400124231973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,float16,0,0.1349493364493052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.18039999405543009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,fp8,0,0.13758933544158936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,128,1,fp8,fp8,0,0.12730666995048523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.1811359922091166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.16714133818944296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,float16,0,0.13538133104642233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.180351992448171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,fp8,0,0.1381066640218099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,128,1,fp8,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.18392000595728555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.16898133357365927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,float16,0,0.13667733470598856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.18141865730285645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,fp8,0,0.1405226687590281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,128,1,fp8,fp8,0,0.13194666306177774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.1841920018196106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.1730133295059204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,float16,0,0.14190399646759033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.18641066551208496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,fp8,0,0.1455733378728231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,128,1,fp8,fp8,0,0.13822933038075766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.19005866845448813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.1792746583620707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,float16,0,0.08736000458399455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.114464004834493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,fp8,0,0.09105599919954936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,128,1,fp8,fp8,0,0.08701866865158081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.11667199929555257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.11149332920710246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,float16,0,0.07851733267307281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.10538666447003682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,128,1,fp8,fp8,0,0.07217599948247273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.09724799791971843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,float16,0,0.0788320004940033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.10526399811108907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,fp8,0,0.07993599772453308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,128,1,fp8,fp8,0,0.07326933244864146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.10545600454012553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,float16,0,0.07870399951934814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.10603200395901997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,fp8,0,0.08062399923801422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,128,1,fp8,fp8,0,0.07506133119265239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.10631466905275981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.09880533814430237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,float16,0,0.08074666559696198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.10730133454004924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,fp8,0,0.08271466692288716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,128,1,fp8,fp8,0,0.07633066674073537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.10812800129254659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.10034666458765666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,float16,0,0.05233600238958994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,fp8,0,0.05449066559473673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,128,1,fp8,fp8,0,0.052245333790779114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,float16,0,0.04962133367856344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.0701333334048589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.0642986645301183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,float16,0,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.06981866558392842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,128,1,fp8,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.07060266534487407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.06621333460013072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,float16,0,0.05030400057633718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.07039466500282288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,fp8,0,0.05051200091838837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,128,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.07100266714890797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.06650666892528534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,float16,0,0.050570666790008545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.07047466437021892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,fp8,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,128,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.07238399982452393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.06770133475462596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,float16,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.05199466645717621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,fp8,0,0.04190933207670847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,128,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.05230933427810669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.052853330969810486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,fp8,0,0.042447999119758606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,128,1,fp8,fp8,0,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.05225066840648651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.05055999755859375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,float16,0,0.042170668641726174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.05355200171470642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,fp8,0,0.0420959989229838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,128,1,fp8,fp8,0,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.05256533126036326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.0503359983364741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.05247466762860616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,fp8,0,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,128,1,fp8,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.052933335304260254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,fp8,0,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,128,1,fp8,fp8,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.049498667319615684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,float16,0,1.1551360289255779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,1.344330628712972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,fp8,0,1.1560479799906414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,128,1,fp8,fp8,0,1.0622613430023193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,1.345578670501709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,1.246549367904663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,float16,0,1.1691146691640217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,1.357258637746175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,fp8,0,1.162943998972575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,128,1,fp8,fp8,0,1.1146346728007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,1.3546667098999023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,1.3015466531117756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,float16,0,1.1992053190867107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,1.3636800448099773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,fp8,0,1.1698986689249675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,128,1,fp8,fp8,0,1.1453813711802165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,1.361680030822754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,1.32970134417216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,float16,0,1.1974133650461833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,1.3876479466756184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,fp8,0,1.1880106925964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,128,1,fp8,fp8,0,1.1666293144226074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,1.3851946194966633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,1.3533280690511067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,float16,0,0.6392266750335693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,0.7458399931589762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,fp8,0,0.6277013222376505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,128,1,fp8,fp8,0,0.629472017288208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,0.7326773007710775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,0.716543992360433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,float16,0,0.5881919860839844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,0.6990026632944742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,fp8,0,0.586085319519043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,128,1,fp8,fp8,0,0.5407199859619141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,0.7008907000223795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,0.6331786712010702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,float16,0,0.5943253437678019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,0.6923627058664957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,fp8,0,0.5932426850001017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,128,1,fp8,fp8,0,0.5662453174591064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,0.6896426677703857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,0.6469653447469076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,float16,0,0.6101280053456625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,0.6943093140920004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,fp8,0,0.5983680089314779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,128,1,fp8,fp8,0,0.5726346572240194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,0.692517360051473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,0.653600017229716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,float16,0,0.6181653340657552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,0.702245314915975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,fp8,0,0.6047733227411906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,128,1,fp8,fp8,0,0.5788266658782959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,0.7026240030924479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,0.6762932936350504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,float16,0,0.3319360017776489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,0.38811198870340985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,fp8,0,0.3266613284746806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,128,1,fp8,fp8,0,0.3245866696039836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,0.3884426752726237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,0.369210680325826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,float16,0,0.3049866755803426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,0.35571734110514325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,fp8,0,0.30642666419347125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,128,1,fp8,fp8,0,0.2808159987131755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,0.35654934247334796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,0.33504533767700195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,float16,0,0.304911990960439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,0.3566506703694661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,fp8,0,0.30534400542577106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,128,1,fp8,fp8,0,0.28591465950012207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,0.35660266876220703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,0.33501867453257245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,float16,0,0.31007466713587445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,0.3609706560770671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,fp8,0,0.3081653316815694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,128,1,fp8,fp8,0,0.2895786762237549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,0.359877347946167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,0.3404426574707031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,float16,0,0.31384533643722534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,0.36609065532684326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,fp8,0,0.3122719923655192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,128,1,fp8,fp8,0,0.29765866200129193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,0.3651786645253499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,0.3482400178909302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,float16,0,0.17720532417297363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.20589333772659302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,fp8,0,0.17375467220942178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,128,1,fp8,fp8,0,0.17076265811920166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.2030506730079651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,0.19684267044067383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,float16,0,0.16100800037384033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.18873600165049234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,fp8,0,0.16054399808247885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,128,1,fp8,fp8,0,0.14939199884732565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.18811200062433878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.1760480006535848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,float16,0,0.1621226668357849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.18913066387176514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,fp8,0,0.1622933348019918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,128,1,fp8,fp8,0,0.152319997549057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.18845866123835245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.17868266503016153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,float16,0,0.16454399625460306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.191594660282135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,fp8,0,0.1644053359826406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,128,1,fp8,fp8,0,0.155130664507548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.19013333320617676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.18107734123865762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,float16,0,0.16659733653068542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.19403199354807535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,fp8,0,0.1648373305797577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,128,1,fp8,fp8,0,0.15997333327929178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.19208000103632608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.185263991355896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,float16,0,0.09899733463923137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.11539199948310852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,fp8,0,0.09809600313504536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,128,1,fp8,fp8,0,0.09820800026257832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.11411733428637187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.11256532867749532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,float16,0,0.09005866448084514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.10613866647084554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,fp8,0,0.09105599919954936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,128,1,fp8,fp8,0,0.08306666711966197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.10578133662541707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.0988106628259023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,float16,0,0.09013866384824117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.10782933235168457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,fp8,0,0.09087999661763509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,128,1,fp8,fp8,0,0.08354133367538452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.10629333058993022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.09829333424568176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,float16,0,0.09009066224098206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.10664533575375874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,128,1,fp8,fp8,0,0.08524800340334575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.10124799609184265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,float16,0,0.09090133508046468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.10625599821408589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,fp8,0,0.0909493366877238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,128,1,fp8,fp8,0,0.08749333024024963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.1074079970518748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.1020960013071696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,float16,0,0.056330665946006775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.06739733119805653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,fp8,0,0.05730666716893514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,128,1,fp8,fp8,0,0.0569653312365214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.06856533388296764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.06681600213050842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,float16,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,fp8,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,128,1,fp8,fp8,0,0.05012799799442291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.060826669136683144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,float16,0,0.054197331269582115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,128,1,fp8,fp8,0,0.05209066470464071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.06513066589832306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,float16,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.06505600114663442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,fp8,0,0.05518933137257894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,128,1,fp8,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.06101333101590475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,float16,0,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.06413866579532623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,fp8,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,128,1,fp8,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.0652106652657191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.06108266611893972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,float16,0,0.03839466720819473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.04605866471926371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,128,1,fp8,fp8,0,0.038133333126703896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.04318933188915253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,float16,0,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,128,1,fp8,fp8,0,0.036330667634805046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.04234133164087931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,float16,0,0.03654933224121729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,128,1,fp8,fp8,0,0.03659199923276901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,float16,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.04426133135954539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,fp8,0,0.03812800099452337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,128,1,fp8,fp8,0,0.036490666369597115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,float16,0,0.03793066740036011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.04437333345413208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,128,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,float16,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,128,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,float16,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,128,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,128,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,float16,0,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,128,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,float16,0,1.1185333728790283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,1.1283679803212483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,fp8,0,1.1174026330312092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,128,1,fp8,fp8,0,1.0352746645609539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,1.1273813247680664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,1.0466026465098064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,float16,0,1.1331199804941814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,1.1439146995544434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,fp8,0,1.1290720303853352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,128,1,fp8,fp8,0,1.0832959810892742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,1.1375733216603596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,1.1025493144989014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,float16,0,1.140992005666097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,1.1513386567433674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,fp8,0,1.137290636698405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,128,1,fp8,fp8,0,1.115557352701823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,1.1477866967519124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,1.1346240043640137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,float16,0,1.1618879636128743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,1.1701013247172039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,fp8,0,1.1667040189107258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,128,1,fp8,fp8,0,1.1383840243021648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,1.1749866803487141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,1.1481760342915852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,float16,0,0.6209226846694946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,0.631168007850647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,fp8,0,0.6116693417231241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,128,1,fp8,fp8,0,0.605189323425293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,0.6194613377253214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,0.6114293336868286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,float16,0,0.5697066783905029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,0.5744693279266357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,fp8,0,0.5696800152460734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,128,1,fp8,fp8,0,0.525551994641622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,0.5720586776733398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,0.5310879945755005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,float16,0,0.5762240091959635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,0.7707413037618002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,0.5818560123443604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,fp8,0,0.5755253235499064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,128,1,fp8,fp8,0,0.5406719843546549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,0.5808480183283488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,0.5460533301035563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,float16,0,0.5816853443781534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,0.5859893163045248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,fp8,0,0.5798879861831665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,128,1,fp8,fp8,0,0.5464586814244589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,0.585807998975118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,0.5533653497695923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,float16,0,0.5897226730982462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,0.5944000085194906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,fp8,0,0.5862880150477091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,128,1,fp8,fp8,0,0.5693866809209188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,0.5922880172729492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,0.5732426643371582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,float16,0,0.32425065835316974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,0.33035733302434284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,fp8,0,0.31813865900039673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,128,1,fp8,fp8,0,0.31329067548116046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,0.3240373333295186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,0.3195093274116516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,float16,0,0.2965973416964213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.2993333339691162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,fp8,fp8,0,0.2739466627438863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,fp8,0,0.29628799359003705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.2982773383458455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.2769813338915507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,float16,0,0.29691733916600543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.2990666627883911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,fp8,0,0.2985440095265706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,128,1,fp8,fp8,0,0.2786613305409749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.3007733424504598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.28249067068099976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,float16,0,0.30186667044957477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,fp8,0,0.3004693388938904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,128,1,fp8,fp8,0,0.28489599625269574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.30481066306432086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,0.2871680061022441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,float16,0,0.3083466688791911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,0.3094506661097209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,fp8,0,0.30505067110061646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,128,1,fp8,fp8,0,0.29149866104125977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,0.3086186647415161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,0.29840532938639325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,float16,0,0.172650674978892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.17601599295934042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,fp8,0,0.1693440079689026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,128,1,fp8,fp8,0,0.16718933979670206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.17307732502619425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.1711626648902893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,float16,0,0.15761599938074747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.15836800138155618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,fp8,0,0.157669335603714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,128,1,fp8,fp8,0,0.14722133676211038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.15879467129707336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.14961600303649902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,float16,0,0.15854400396347046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.15987199544906616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,fp8,0,0.15888532996177673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,128,1,fp8,fp8,0,0.14986667037010193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.1583466629187266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.15154133240381876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,float16,0,0.15904000401496887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.16275200247764587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,fp8,0,0.15953600406646729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,128,1,fp8,fp8,0,0.15228266517321268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.16100266575813293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.15480533242225647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,float16,0,0.16261333227157593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.1646986703077952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,fp8,0,0.16221333543459573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,128,1,fp8,fp8,0,0.15643200278282166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.16453333695729574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.15866133570671082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,float16,0,0.0965119997660319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.09845866759618123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,fp8,0,0.09590400258700053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,128,1,fp8,fp8,0,0.09668266773223877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.09762666622797649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.09731733798980713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,float16,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.0906773308912913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,fp8,0,0.08915733297665913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,128,1,fp8,fp8,0,0.08141333361466725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.08913066983222961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.08206933240095775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,float16,0,0.08898666501045227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.09045333663622539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,fp8,0,0.08914666374524434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,128,1,fp8,fp8,0,0.08244266609350841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.08995733658472697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.08220799763997395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,float16,0,0.08898133039474487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.08904533584912618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,fp8,0,0.0890826682249705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,128,1,fp8,fp8,0,0.08353599905967712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.09014399846394856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.08397866288820903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,float16,0,0.08949866890907288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.090938667456309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,fp8,0,0.0911253293355306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,128,1,fp8,fp8,0,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.09090666969617207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.08528000116348267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,float16,0,0.05604266623655955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.05665066838264465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,128,1,fp8,fp8,0,0.05632533133029938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.057946667075157166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,float16,0,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,fp8,0,0.05394133428732554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,128,1,fp8,fp8,0,0.050981332858403526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.0557226687669754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.05054399867852529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,float16,0,0.05389333268006643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,128,1,fp8,fp8,0,0.05123733480771383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.051551997661590576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,float16,0,0.054245332876841225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.0562666654586792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,fp8,0,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,128,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,float16,0,0.05434666574001312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.05604266623655955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,fp8,0,0.055685331424077354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,128,1,fp8,fp8,0,0.05034666756788889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.055173332492510475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.03892799963553747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,fp8,0,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,128,1,fp8,fp8,0,0.03676799933115641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,float16,0,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,128,1,fp8,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,float16,0,0.036714665591716766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.03500800083080927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,float16,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,128,1,fp8,fp8,0,0.03645866612593333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.0386613334218661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.03692800054947535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,float16,0,0.03879466652870178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,128,1,fp8,fp8,0,0.036101333796978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,float16,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.028778667251269024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,fp8,0,0.02784000088771184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,128,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,128,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,float16,0,0.028309332827727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,128,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.027978666126728058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,128,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,float16,0,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,128,1,fp8,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,float16,0,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,float16,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.023973333338896435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,128,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.023765332996845245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,float16,0,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,fp8,0,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,128,1,fp8,fp8,0,0.02276800076166789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,float16,0,0.528714656829834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,0.5176959832509359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,fp8,0,0.5253599882125854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,128,1,fp8,fp8,0,0.4814026753107707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,0.5176533460617065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,0.4737653334935506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,float16,0,0.5323573350906372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,0.5249280134836832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,fp8,0,0.530293345451355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,128,1,fp8,fp8,0,0.4968533515930176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,0.5205706755320231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,0.4877920150756836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,float16,0,0.5376533269882202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,0.5266986687978109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,fp8,0,0.5341013272603353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,128,1,fp8,fp8,0,0.5050826470057169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,0.5254079898198446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,0.49562132358551025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,float16,0,0.5433653195699056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,1.5005119641621907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,0.5348960161209106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,fp8,0,0.5416959921518961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,128,1,fp8,fp8,0,0.528874675432841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,0.5327839851379395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,0.5196533203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,float16,0,0.30316799879074097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,0.2958186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,fp8,0,0.29568533102671307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,128,1,fp8,fp8,0,0.2881173292795817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,0.2918986678123474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,0.28387733300526935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,float16,0,0.27660266558329266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.2688000003496806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,fp8,fp8,0,0.24773865938186646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,fp8,0,0.2759893337885539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.26794666051864624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.24516266584396362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,float16,0,0.2749706705411275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.27029865980148315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,fp8,0,0.27292267481486004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,128,1,fp8,fp8,0,0.2576693296432495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.2690719962120056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.2526719967524211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,float16,0,0.276799996693929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.2732693354288737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,fp8,0,0.27398399511973065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,128,1,fp8,fp8,0,0.26132800181706745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.26893866062164307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.2569920023282369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,float16,0,0.2817866603533427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,0.2791680097579956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,fp8,0,0.27853866418202716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,128,1,fp8,fp8,0,0.27082665761311847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,0.2741706569989522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.26501866181691486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,float16,0,0.1605280041694641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.15971733132998148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,fp8,0,0.15634666879971823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,128,1,fp8,fp8,0,0.15691199898719788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.1546346644560496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.15339199701944986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,float16,0,0.14698666334152222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.1446560025215149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,fp8,0,0.14611732959747314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,128,1,fp8,fp8,0,0.13452266653378805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.14453867077827454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.13220266501108804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,float16,0,0.1482186714808146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.14640000462532043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,fp8,0,0.14684266845385233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,128,1,fp8,fp8,0,0.13804800311724344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.14416533708572388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.13587199648221335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,float16,0,0.14755200346310934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.14680000146230063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,fp8,0,0.14696000019709268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,128,1,fp8,fp8,0,0.1416853368282318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.145306666692098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.13806399703025818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,float16,0,0.1508906682332357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.14755733807881674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,fp8,0,0.1497066617012024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,128,1,fp8,fp8,0,0.14406933387120566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.1463466684023539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.1425279974937439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,float16,0,0.09058666229248047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.08889599641164143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,fp8,0,0.08938133716583252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,128,1,fp8,fp8,0,0.08959999680519104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.08851200342178345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,float16,0,0.08270933230717976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.0819893330335617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,fp8,0,0.08344533046086629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,128,1,fp8,fp8,0,0.075013334552447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.08051733175913493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.07520000139872234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,float16,0,0.08277333279450734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.08159466584523518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,fp8,0,0.08321600159009297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,128,1,fp8,fp8,0,0.07576533158620198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.08072533210118611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,float16,0,0.08262933293978374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.07746666669845581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.08145600060621898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,fp8,0,0.08562133709589641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,128,1,fp8,fp8,0,0.07817600170771281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.08364799618721008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,float16,0,0.08499200145403545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.08311466872692108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,fp8,0,0.08501866459846497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,128,1,fp8,fp8,0,0.08066133161385854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.08384000261624654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.07949333389600118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,float16,0,0.05375466744105021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.051776001850763954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,fp8,0,0.05217066903909048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,128,1,fp8,fp8,0,0.05086933573087057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.051216001311937966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,float16,0,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.04977599779764811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,128,1,fp8,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,float16,0,0.051856001218159996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.04955733319123586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,128,1,fp8,fp8,0,0.04677866895993551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.04927466809749603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.04725333551565806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,float16,0,0.05199466645717621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.049866666396458946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,128,1,fp8,fp8,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.04711466530958811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,float16,0,0.051856001218159996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,fp8,0,0.05082133412361145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,128,1,fp8,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.04738133152325948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,fp8,0,0.03610666592915853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,128,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,float16,0,0.035743998984495796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,128,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,fp8,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,128,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,float16,0,0.03589866558710734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,fp8,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,128,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,float16,0,0.03700266778469086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,fp8,0,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,128,1,fp8,fp8,0,0.0348693331082662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,float16,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,fp8,0,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,128,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,float16,0,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,128,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,float16,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,128,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,128,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.02045866722861926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,float16,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,float16,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,float16,0,0.2892480095227559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.29073599974314374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,fp8,0,0.2912320097287496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,128,1,fp8,fp8,0,0.27036799987157184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.28946133454640705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.2707306742668152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,float16,0,0.29108800490697223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.2911733388900757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,fp8,0,0.2903733253479004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,128,1,fp8,fp8,0,0.28170667092005414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.2899893323580424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.2799999912579854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,float16,0,0.2930186589558919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.2935626705487569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,fp8,0,0.29050666093826294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,128,1,fp8,fp8,0,0.28519999980926514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.2926666736602783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.2841386596361796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,float16,0,0.2962613304456075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,0.29660266637802124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,fp8,0,0.29495465755462646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,128,1,fp8,fp8,0,0.29356799523035687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,0.2956426739692688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.29320534070332843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,float16,0,0.1667626698811849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.166512002547582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,fp8,0,0.16396799683570862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,128,1,fp8,fp8,0,0.1669279932975769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.16503999630610147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.16686399777730307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,float16,0,0.15467199683189392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.1529973347981771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,fp8,0,0.15321066975593567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,128,1,fp8,fp8,0,0.14437333742777506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.15426133076349893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.1439253290494283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,float16,0,0.1543359955151876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.15363732973734537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,fp8,0,0.15364799896876016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,128,1,fp8,fp8,0,0.148117333650589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.15435733397801718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.14831466476122537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,float16,0,0.15637866655985513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.15526400009791055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,fp8,0,0.15593600273132324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,128,1,fp8,fp8,0,0.15034666657447815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.15524267156918845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.15127999583880106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,float16,0,0.1583573321501414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.1569813291231791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,fp8,0,0.15652799606323242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,128,1,fp8,fp8,0,0.1567200024922689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.15837333599726358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.15588266650835672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,float16,0,0.09317866961161296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.09227733810742696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,fp8,0,0.09106133381525676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,128,1,fp8,fp8,0,0.09409600496292114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.09116266171137492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.09550399581591289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,float16,0,0.086517333984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.08714666962623596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,fp8,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,128,1,fp8,fp8,0,0.08011733492215474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.08523733417193095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.08045333127180736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,float16,0,0.08666132887204488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.08599467078844707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,fp8,0,0.08531733353932698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,128,1,fp8,fp8,0,0.08070399860541026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.08562133709589641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.08127466837565105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,float16,0,0.08699733018875122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,fp8,0,0.08595200379689534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,128,1,fp8,fp8,0,0.08159466584523518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.08707732955614726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.08236800134181976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,float16,0,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,fp8,0,0.08706667025883992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,128,1,fp8,fp8,0,0.08493333061536153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.0879200001557668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.08368000388145447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,float16,0,0.054234668612480164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.0531626691420873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,fp8,0,0.0537120004494985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,128,1,fp8,fp8,0,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,float16,0,0.05222400029500326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.052069331208864846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,fp8,0,0.051498666405677795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,128,1,fp8,fp8,0,0.04833066463470459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.0516480008761088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,float16,0,0.05239466826121012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.05184000233809153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,128,1,fp8,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,float16,0,0.05247466762860616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,fp8,0,0.051925331354141235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,128,1,fp8,fp8,0,0.048207998275756836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,float16,0,0.05320000151793162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.053344001372655235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,fp8,0,0.05235733091831207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,128,1,fp8,fp8,0,0.049866666396458946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.0532533327738444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.03409600009520849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,fp8,0,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,128,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.03398400048414866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,float16,0,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.03412266572316488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,fp8,0,0.033813332517941795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,128,1,fp8,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,float16,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.03389866650104523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,128,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,float16,0,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.034586665530999504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,fp8,0,0.03454400102297465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,128,1,fp8,fp8,0,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,float16,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,128,1,fp8,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,128,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.02625600000222524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,fp8,0,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.02645866572856903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,128,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,fp8,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,128,1,fp8,fp8,0,0.02586666742960612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,float16,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,128,1,fp8,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,128,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,fp8,0,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,float16,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,fp8,0,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,float16,0,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,128,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,128,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,fp8,0,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,float16,0,0.20797866582870483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.2071946660677592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,fp8,0,0.20715733369191489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,128,1,fp8,fp8,0,0.19207467635472616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.2066239913304647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.1916373372077942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,float16,0,0.20468266805013022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.20784000555674234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,fp8,0,0.20762133598327637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,128,1,fp8,fp8,0,0.194650669892629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.2063360015551249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.19594132900238037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,float16,0,0.2090239922205607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.20865599314371744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,fp8,0,0.20803733666737875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,128,1,fp8,fp8,0,0.19723733266194662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.2075200080871582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.19753599166870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,float16,0,0.20959466695785522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.20980799198150635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,fp8,0,0.20971733331680298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,128,1,fp8,fp8,0,0.20339200894037882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.21016534169514975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.2016213337580363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,float16,0,0.11956266562143962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.12050666411717732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,fp8,0,0.11979200442632039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,128,1,fp8,fp8,0,0.11738666892051697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.11785067121187846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.11769599715868632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,float16,0,0.11152533690134685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.11250133315722148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,fp8,0,0.1118986705938975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,128,1,fp8,fp8,0,0.10451733072598775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.11326932907104492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.10345600048700969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,float16,0,0.11200533310572307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.11176533500353496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,fp8,0,0.11156800389289856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,128,1,fp8,fp8,0,0.10546666383743286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.1132586697737376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.10397332906723022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,float16,0,0.11366933584213257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.11349333326021831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,fp8,0,0.11158399780591328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,128,1,fp8,fp8,0,0.10539733370145161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.11269866426785786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.10409067074457805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,float16,0,0.11357333262761433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.11356266339619954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,fp8,0,0.11359999577204387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,128,1,fp8,fp8,0,0.10938666264216106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.11455999811490376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.10983999570210774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,float16,0,0.06888533135255177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.06673599779605865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,fp8,0,0.06683200101057689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,128,1,fp8,fp8,0,0.06593599915504456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.06735999882221222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.06487466891606648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,float16,0,0.06462400158246358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,fp8,0,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,128,1,fp8,fp8,0,0.061530664563179016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.0642080008983612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,float16,0,0.06410666803518932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.06471999982992808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,fp8,0,0.0666186660528183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,128,1,fp8,fp8,0,0.06233066817124685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.06459733347098033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.06080000102519989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,float16,0,0.06433600187301636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.06584533552328746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,fp8,0,0.06459199885527293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,128,1,fp8,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,float16,0,0.06446933249632518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.06598400076230367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,fp8,0,0.06539733211199443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,128,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.06619733572006226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.06073066592216492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,fp8,0,0.04171733558177948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,128,1,fp8,fp8,0,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,float16,0,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,fp8,0,0.04050666590531667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,128,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,float16,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,128,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,fp8,0,0.04062933226426443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,128,1,fp8,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.04203199843565623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,float16,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.041375999649365745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,fp8,0,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,128,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,float16,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,128,1,fp8,fp8,0,0.02899733434120814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,128,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,fp8,0,0.027701333165168762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,128,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,float16,0,0.027818667391935985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.02789866675933202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,128,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,128,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,128,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.0206986665725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,128,1,fp8,fp8,0,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,float16,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,128,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,fp8,0,0.024127999941507976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,128,1,fp8,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.02260799954334895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,0.395466685295105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,float16,0,0.16526933511098227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.1639306644598643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,fp8,0,0.16484799981117249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,128,1,fp8,fp8,0,0.1523360013961792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.16639999548594156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.15263467033704123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,float16,0,0.16642666856447855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.16586666305859885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,fp8,0,0.164901336034139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,128,1,fp8,fp8,0,0.15331733226776123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.1630773345629374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.15343999862670898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,float16,0,0.16608533263206482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,fp8,0,0.16635200381278992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.16659733653068542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,128,1,fp8,fp8,0,0.15296533703804016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.16613333423932394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.15266666809717813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,float16,0,0.16666133205095926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.16694400707880655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,fp8,0,0.16683199008305868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,128,1,fp8,fp8,0,0.15691733360290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.16498667001724243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,float16,0,0.09102933605511983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.1572426656881968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.093231995900472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,fp8,0,0.09282666444778442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,128,1,fp8,fp8,0,0.08868799606959026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.09272000193595886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.08889066179593404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,float16,0,0.09088533123334248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.08892266949017842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,fp8,0,0.09093333284060161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,128,1,fp8,fp8,0,0.08385066191355388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.08916266759236653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.08375466863314311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,float16,0,0.09005866448084514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.09079466263453166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,fp8,0,0.08974933624267578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,128,1,fp8,fp8,0,0.0848586658636729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,float16,0,0.09131733576456706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.09104532996813457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,fp8,0,0.09044266740481059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,128,1,fp8,fp8,0,0.0839413305123647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.09091200431187947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.08463467160860698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,float16,0,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.09118933478991191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,fp8,0,0.09110933542251587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,128,1,fp8,fp8,0,0.08488532900810242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.09095999598503113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.086325337489446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,float16,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.054005334774653115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,128,1,fp8,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,float16,0,0.05212800204753876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,128,1,fp8,fp8,0,0.05077866713205973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.05068266888459524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,float16,0,0.05387733379999796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,fp8,0,0.05426666637261709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,128,1,fp8,fp8,0,0.051738664507865906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.05022933085759481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,float16,0,0.05347733199596405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.053946668903032936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,fp8,0,0.053946668903032936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,128,1,fp8,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.053904001911481224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.05187733471393585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,float16,0,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,128,1,fp8,fp8,0,0.052389333645502724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.05233600238958994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,float16,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,128,1,fp8,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,128,1,fp8,fp8,0,0.0351200004418691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,128,1,fp8,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.03600533306598663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,float16,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.035887998839219414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,128,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,float16,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,128,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,128,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,fp8,0,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,128,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,float16,0,0.02364266663789749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,128,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,fp8,0,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,128,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,float16,0,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,128,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,float16,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,128,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,float16,0,0.14355199535687765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.14293866356213888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,fp8,0,0.14364799857139587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,128,1,fp8,fp8,0,0.13378666838010153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.14215466380119324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.13331733147303262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,float16,0,0.14265066385269165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.1426346699396769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,fp8,0,0.14215999841690063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,128,1,fp8,fp8,0,0.1325546701749166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.14283733566602072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.1337279975414276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,float16,0,0.1421333352724711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.14448533455530801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,fp8,0,0.14258133371671042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,128,1,fp8,fp8,0,0.1341973344484965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.1421440045038859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.13378666838010153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,float16,0,0.1423466702302297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.14249066511789957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,fp8,0,0.14404799540837607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,128,1,fp8,fp8,0,0.13423466682434082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.14231999715169272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.13411733508110046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,float16,0,0.08050666749477386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.08084266881148021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,fp8,0,0.08063466846942902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,128,1,fp8,fp8,0,0.07698133091131847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.08064533273379008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.07653333246707916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,float16,0,0.07854933540026347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.07868800063927968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,fp8,0,0.07965333263079326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,128,1,fp8,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.07899199922879536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.07445333401362102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,float16,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.07896533111731212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,fp8,0,0.08051200211048126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,128,1,fp8,fp8,0,0.07684266567230225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.07469333211580913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,float16,0,0.07859733204046886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.0788320004940033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,fp8,0,0.08066666622956593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,128,1,fp8,fp8,0,0.07659733295440674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.08063466846942902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.07655466596285503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,float16,0,0.07967466612656911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.07920533418655396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,fp8,0,0.07923200229803722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,128,1,fp8,fp8,0,0.07482133309046428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.08070933322111766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.07671999931335449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,float16,0,0.04959466556708018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.04882133503754934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,fp8,0,0.05026666820049286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,128,1,fp8,fp8,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,float16,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.05012799799442291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,128,1,fp8,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.04950400193532308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.04629333317279816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,float16,0,0.04983466863632202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,128,1,fp8,fp8,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,float16,0,0.04808000226815542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,128,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,float16,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,fp8,0,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,128,1,fp8,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.04986133178075155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,128,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,128,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,float16,0,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,128,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,fp8,0,0.031141333281993866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,128,1,fp8,fp8,0,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,float16,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,128,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.02199999988079071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,float16,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.024282666544119518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,128,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,float16,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,128,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.02385599911212921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,float16,0,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.023567999402681988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,128,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,128,1,fp8,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,128,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,128,1,fp8,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,float16,0,0.016602666427691776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,128,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,128,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,float16,0,0.8871573607126871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,5.34002685546875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,fp8,0,0.9305546283721924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,128,1,fp8,fp8,0,0.833306630452474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,5.346799850463867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,4.94648011525472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,float16,0,0.9364213148752848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,5.355221430460612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,fp8,0,0.9104266961415609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,128,1,fp8,fp8,0,0.8503733476003011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,5.364538828531901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,4.966533342997233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,float16,0,0.9208373228708903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,5.377424240112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,fp8,0,0.9297333558400472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,128,1,fp8,fp8,0,0.8707839647928873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,5.385093053181966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,4.986330668131511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,float16,0,0.9506613413492838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,5.413616180419922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,fp8,0,0.9617493152618408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,128,1,fp8,fp8,0,0.9077920118967692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,5.443509419759114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,5.0256961186726885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,float16,0,0.5293386777242025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,2.8188212712605796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,fp8,0,0.5421280066172282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,128,1,fp8,fp8,0,0.5136693318684896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,2.8328800201416016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,2.622117360432943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,float16,0,0.47014399369557697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,2.7488479614257812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,fp8,0,0.47571198145548504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,128,1,fp8,fp8,0,0.4431573152542114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,2.7536694208780923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,2.549269358317057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,float16,0,0.473904013633728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,2.7528320948282876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,fp8,0,0.47977598508199054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,128,1,fp8,fp8,0,0.44866132736206055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,2.7596747080485025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,2.55731201171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,float16,0,0.48236266771952313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,2.764512062072754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,fp8,0,0.48819200197855633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,128,1,fp8,fp8,0,0.4580533504486084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,2.770458539326986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,2.566373348236084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,float16,0,0.4968213240305583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,2.7814451853434243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,fp8,0,0.5055840015411377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,128,1,fp8,fp8,0,0.47598934173583984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,2.790207862854004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,2.5838185946146646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,float16,0,0.29818133513132733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,1.4932799339294434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,fp8,0,0.3054453333218892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,128,1,fp8,fp8,0,0.2916693290074666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,1.5007999738057454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,1.3963039716084797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,float16,0,0.26820266246795654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,1.4598827362060547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,fp8,0,0.26898133754730225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,128,1,fp8,fp8,0,0.26525332530339557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,1.45904541015625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,1.3573066393534343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,float16,0,0.2712480028470357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,1.4609653155008953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,fp8,0,0.2722986737887065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,128,1,fp8,fp8,0,0.2614133358001709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,1.4612746238708496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,1.360655943552653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,float16,0,0.27584532896677655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,1.4653120040893555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,fp8,0,0.27948800722757977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,128,1,fp8,fp8,0,0.2649173339207967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,1.4695040384928386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,1.3655840555826824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,float16,0,0.2834186752637227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,1.4753012657165527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,fp8,0,0.2871359984079997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,128,1,fp8,fp8,0,0.27348266045252484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,1.479413350423177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,1.3745066324869792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,float16,0,0.20863999923070273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,0.8590453465779623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,fp8,0,0.2097653349240621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,128,1,fp8,fp8,0,0.1986560026804606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,0.889135996500651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,0.7961013317108154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,float16,0,0.20387732982635498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,0.8512266476949056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,fp8,0,0.20561599731445312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,128,1,fp8,fp8,0,0.19857066869735718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,0.8537653287251791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,0.7935360272725424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,float16,0,0.20578666528066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,0.8527519702911377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,fp8,0,0.20545599857966104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,128,1,fp8,fp8,0,0.195850670337677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,0.8543093204498291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,0.794373353322347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,float16,0,0.20570133129755655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,0.8526346683502197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,fp8,0,0.20586667458216348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,128,1,fp8,fp8,0,0.19748800992965698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,0.8560907046000162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,0.7940053145090739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,float16,0,0.20974934101104736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,0.8568267027537028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,fp8,0,0.20714133977890015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,128,1,fp8,fp8,0,0.1979573369026184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,0.8745813369750977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,0.7952480316162109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,float16,0,0.6915573279062907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,3.165744145711263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,fp8,0,0.6728266874949137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,128,1,fp8,fp8,0,0.6238346497217814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,3.1715094248453775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,2.935701370239258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,float16,0,0.6757493019104004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,3.1767307917277017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,fp8,0,0.6821653048197428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,128,1,fp8,fp8,0,0.6352213223775228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,3.1808319091796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,2.9478346506754556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,float16,0,0.686959981918335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,3.189397176106771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,fp8,0,0.6942880153656006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,128,1,fp8,fp8,0,0.6501439809799194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,3.197648048400879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,2.9633334477742515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,float16,0,0.7091253598531088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,3.216416041056315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,fp8,0,0.7181759675343832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,128,1,fp8,fp8,0,0.6790026823679606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,3.2262293497721353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,2.9912052154541016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,float16,0,0.4013173182805379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,1.6984160741170247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,fp8,0,0.4126453399658203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,128,1,fp8,fp8,0,0.39202133814493817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,1.708463986714681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,1.5831146240234375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,float16,0,0.35916801293691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,1.6450026830037434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,fp8,0,0.3615413506825765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,128,1,fp8,fp8,0,0.33925867080688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,1.6469546953837078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,1.5309279759724934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,float16,0,0.3610453208287557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,1.6494720776875813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,fp8,0,0.3665493329366048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,128,1,fp8,fp8,0,0.3444426854451497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,1.653813362121582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,1.5368800163269043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,float16,0,0.36841599146525067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,1.657423973083496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,fp8,0,0.3739413420359294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,128,1,fp8,fp8,0,0.35291731357574463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,1.658565362294515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,1.5425920486450195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,float16,0,0.37841065724690753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,1.670512040456136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,fp8,0,0.3855146567026774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,128,1,fp8,fp8,0,0.3637813329696655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,1.6770666440327961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,1.556565284729004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,float16,0,0.2283253272374471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,0.9147893587748209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,fp8,0,0.2342026631037394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,128,1,fp8,fp8,0,0.22484799226125082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,0.9216693242390951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,0.8575466473897299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,float16,0,0.20355733235677084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,0.8844373226165771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,fp8,0,0.20579200983047485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,128,1,fp8,fp8,0,0.19790399074554443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,0.8872480392456055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,0.8286666870117188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,float16,0,0.20546134312947592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,0.8869919776916504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,fp8,0,0.20859734217325845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,128,1,fp8,fp8,0,0.1989120046297709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,0.8913013140360514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,0.8311039606730143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,float16,0,0.21097066005071005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,0.893999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,fp8,0,0.21337066094080606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,128,1,fp8,fp8,0,0.20361600319544473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,0.8961493174235026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,0.8361333211263021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,float16,0,0.21784534056981406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,0.9012160301208496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,fp8,0,0.22139199574788412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,128,1,fp8,fp8,0,0.2106026609738668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,0.9065546989440918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,0.8427039782206217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,float16,0,0.164000004529953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,0.545141339302063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,fp8,0,0.1623360017935435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,128,1,fp8,fp8,0,0.1546293298403422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,0.5439519882202148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,0.5053439935048422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,float16,0,0.16061866283416748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,0.5610666672388712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,fp8,0,0.1602720022201538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,128,1,fp8,fp8,0,0.15244799852371216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,0.5409173170725504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,0.5036480029424032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,float16,0,0.16056000192960104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,0.539189338684082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,fp8,0,0.16087999939918518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,128,1,fp8,fp8,0,0.1520960032939911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,0.5576853354771932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,0.5032159884770712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,float16,0,0.16090666254361471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,0.5408106644948324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,fp8,0,0.1609599987665812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,128,1,fp8,fp8,0,0.15376533071200052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,0.5457599957784017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,0.5028213262557983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,float16,0,0.16145066420237222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,0.5412853161493937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,fp8,0,0.1606666644414266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,128,1,fp8,fp8,0,0.15281066298484802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,0.5607519944508871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,0.5053386688232422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,float16,0,0.555840015411377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,2.2870346705118814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,fp8,0,0.561957319577535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,128,1,fp8,fp8,0,0.5210986534754435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,2.2928640047709146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,2.1240426699320474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,float16,0,0.5623146692911783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,2.2954986890157065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,fp8,0,0.5687573353449503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,128,1,fp8,fp8,0,0.529312014579773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,2.3010239601135254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,2.132597287495931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,float16,0,0.5729706684748331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,2.308560053507487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,fp8,0,0.5800533294677734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,128,1,fp8,fp8,0,0.5421013434727987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,2.313802719116211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,2.144672075907389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,float16,0,0.5903040170669556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,2.3302720387776694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,fp8,0,0.5998186667760214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,128,1,fp8,fp8,0,0.5635413328806559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,2.3384159406026206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,2.167840003967285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,float16,0,0.33683733145395917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,1.2424639860788982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,fp8,0,0.3438560167948405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,128,1,fp8,fp8,0,0.3266879916191101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,1.2504693667093914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,1.161189317703247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,float16,0,0.29811733961105347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,1.1964320341746013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,fp8,0,0.3000906705856323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,128,1,fp8,fp8,0,0.2837280035018921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,1.239360014597575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,1.1152693430582683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,float16,0,0.30266666412353516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,1.2017280260721843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,fp8,0,0.30431467294692993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,128,1,fp8,fp8,0,0.287445326646169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,1.203775962193807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,1.12009596824646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,float16,0,0.30771199862162274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,1.208085298538208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,fp8,0,0.31035733222961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,128,1,fp8,fp8,0,0.2931893269220988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,1.2106506824493408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,1.1257812976837158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,float16,0,0.31626667579015094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,1.2196746667226155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,fp8,0,0.3221386671066284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,128,1,fp8,fp8,0,0.30406399567921955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,1.2232800324757893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,1.1364373366038005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,float16,0,0.19342400630315146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,0.6775573094685873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,fp8,0,0.19785600900650024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,128,1,fp8,fp8,0,0.19138665994008383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,0.6819413503011068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,0.6356213490168253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,float16,0,0.1709973414738973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,0.6534773508707682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,fp8,0,0.1728000044822693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,128,1,fp8,fp8,0,0.16485333442687988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,0.6539413531621298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,0.6094880104064941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,float16,0,0.1726613243420919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,0.6529333194096884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,fp8,0,0.17499200503031412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,128,1,fp8,fp8,0,0.16743467251459757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,0.6561386585235596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,0.6142719984054565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,float16,0,0.17594132820765176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,0.6575573285420736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,fp8,0,0.17919466892878214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,128,1,fp8,fp8,0,0.1729066570599874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,0.6582346757253011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,0.6184639930725098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,float16,0,0.18289599816004434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,0.664192001024882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,fp8,0,0.18708266814549765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,128,1,fp8,fp8,0,0.17939732472101846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,0.6684693495432535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,0.6251999934514364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,float16,0,0.1381760040918986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,0.41074132919311523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,fp8,0,0.13757866621017456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,128,1,fp8,fp8,0,0.13205333550771078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,0.4125279982884725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,0.38394665718078613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,float16,0,0.13593066732088724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,0.40806933244069415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,fp8,0,0.1365546683470408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,128,1,fp8,fp8,0,0.12994666894276938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,0.40907732645670575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,0.3820426861445109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,float16,0,0.1357973317305247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,0.40803198019663495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,fp8,0,0.1357866624991099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,128,1,fp8,fp8,0,0.12963199615478516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,0.42131733894348145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,0.38309868176778156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,float16,0,0.1361066699028015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,0.4089759985605876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,fp8,0,0.13606933752695718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,128,1,fp8,fp8,0,0.12964800000190735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,0.4240640004475911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,0.38361068566640216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,float16,0,0.13594667116800943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,0.4085013469060262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,fp8,0,0.13857600092887878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,128,1,fp8,fp8,0,0.1325546701749166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,0.4117279847462972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,0.3827306826909383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,float16,0,0.8963359991709391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,3.0231412251790366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,fp8,0,0.871397336324056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,128,1,fp8,fp8,0,0.8076426982879639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,3.030069351196289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,2.8038721084594727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,float16,0,0.8804746468861898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,3.0405279795328775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,fp8,0,0.8871626853942871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,128,1,fp8,fp8,0,0.8259733517964681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,3.0470825831095376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,2.8226772944132485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,float16,0,0.897711992263794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,3.0607945124308267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,fp8,0,0.9046826362609863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,128,1,fp8,fp8,0,0.8459200064341227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.3038133382797241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,3.0689334869384766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,2.843695958455404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,float16,0,0.9285279909769694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,fp8,0,0.9384746551513672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,3.0983413060506186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,128,1,fp8,fp8,0,0.8846293290456136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,3.1079254150390625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,2.880005200703939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,float16,0,0.5081013441085815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,fp8,0,0.5188586711883545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,1.6218719482421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,128,1,fp8,fp8,0,0.49037333329518634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,1.634069283803304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,1.5149332682291667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,float16,0,0.44635732968648273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,fp8,0,0.4508533477783203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,1.5525600115458171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,128,1,fp8,fp8,0,0.4288959900538127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,1.5558560689290364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,1.4434240659077961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,float16,0,0.4513813257217407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,fp8,0,0.45736531416575116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,1.5566399892171223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,128,1,fp8,fp8,0,0.4255359967549642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,1.5631999969482422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,1.4502879778544109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,float16,0,0.459391991297404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,fp8,0,0.4660053253173828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,1.5684000651041667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,128,1,fp8,fp8,0,0.44366931915283203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,1.5737172762552898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,1.4568959871927898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,float16,0,0.4742826620737712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,fp8,0,0.4835520188013713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,1.586319923400879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,128,1,fp8,fp8,0,0.45342934131622314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,1.594469388326009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,1.4776533444722493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,float16,0,0.2732853293418884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,fp8,0,0.27954665819803876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,0.8565119902292887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,128,1,fp8,fp8,0,0.27318400144577026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,0.8623572985331217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,0.80293869972229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,float16,0,0.24034132560094199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,fp8,0,0.2434933384259542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,0.8171466986338297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,128,1,fp8,fp8,0,0.23201600710550943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,0.8380853335062662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,0.7657333215077718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,float16,0,0.24451200167338052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,fp8,0,0.24551467100779215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,0.8211680253346761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,128,1,fp8,fp8,0,0.23344000180562338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,0.823637326558431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,0.7691199779510498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,float16,0,0.24894932905832926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,fp8,0,0.2534079949061076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,0.8279733657836914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,128,1,fp8,fp8,0,0.2404266595840454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,0.8323679765065511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,0.7745333512624105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,float16,0,0.2587520082791646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,fp8,0,0.26426132520039874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,0.8383999665578207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,128,1,fp8,fp8,0,0.2507733305295308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,0.841749350229899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,0.7839039961496989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,float16,0,0.15614933768908182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,fp8,0,0.15914666652679443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,0.475658655166626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,128,1,fp8,fp8,0,0.1544533371925354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,0.47816534837086994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,0.449285348256429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,float16,0,0.13821333646774292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,0.4540586471557617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,fp8,0,0.13806933164596558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,128,1,fp8,fp8,0,0.13220266501108804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,0.4621173143386841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,0.42450666427612305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,float16,0,0.13782399892807007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,fp8,0,0.1383680005868276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,0.4548693497975667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,128,1,fp8,fp8,0,0.13387200236320496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,0.4556586742401123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,0.4262719949086507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,float16,0,0.14199466506640115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,fp8,0,0.1437013347943624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,0.45974934101104736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,128,1,fp8,fp8,0,0.13809067010879517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,0.45980799198150635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,0.430735985438029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,float16,0,0.14652799566586813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,fp8,0,0.14819733301798502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,0.46380265553792316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,128,1,fp8,fp8,0,0.1442080040772756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,0.46624000867207843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,0.4379040002822876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,float16,0,0.11360533038775127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.2959360082944234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,fp8,0,0.11327466368675232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,128,1,fp8,fp8,0,0.10940800110499065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.29789332548777264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,0.27752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,float16,0,0.1113759974638621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.2957013249397278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,fp8,0,0.11170132954915364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,128,1,fp8,fp8,0,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,0.2950773239135742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,0.27689067522684735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,float16,0,0.11154666543006897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.29525866111119586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,fp8,0,0.1116373340288798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,128,1,fp8,fp8,0,0.1072106659412384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,0.29546666145324707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,0.2755626638730367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,float16,0,0.11179199814796448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.29449599981307983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,fp8,0,0.11133866508801778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,128,1,fp8,fp8,0,0.107232004404068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.2940853238105774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,0.27538132667541504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,float16,0,0.11160000165303548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,fp8,0,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.29577600955963135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,128,1,fp8,fp8,0,0.10753066341082256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.295909325281779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,0.2775999903678894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,float16,0,0.6487199862798055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,fp8,0,0.6549013455708822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,1.8423093159993489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,128,1,fp8,fp8,0,0.6058293183644613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,1.8469546635945637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,1.7118933995564778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,float16,0,0.6567360162734985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,fp8,0,0.6680053075154623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,1.853450616200765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,128,1,fp8,fp8,0,0.618837316830953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,1.8591945966084797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,1.7207040786743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,float16,0,0.669813315073649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,fp8,0,0.6768906911214193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,1.8650506337483723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,128,1,fp8,fp8,0,0.6334986686706543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,1.875999927520752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,1.7376640637715657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,float16,0,0.6933866341908773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,fp8,0,0.7018826802571615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,1.8913280169169109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,128,1,fp8,fp8,0,0.6624533335367838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,1.9016693433125813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,1.7651999791463215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,float16,0,0.3860693375269572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,fp8,0,0.3945653438568115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,1.0072373549143474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,128,1,fp8,fp8,0,0.373306671778361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,1.0169813632965088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,0.9435306390126547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,float16,0,0.3389013210932414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,fp8,0,0.3489439884821574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,0.9552853107452393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,128,1,fp8,fp8,0,0.32310400406519574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,0.9579626719156901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,0.8889333407084147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,float16,0,0.34408533573150635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,fp8,0,0.34855465094248456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,0.9609599908192953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,128,1,fp8,fp8,0,0.32679466406504315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,0.9647040367126465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,0.8960320154825846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,float16,0,0.35115734736124676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,fp8,0,0.3548640012741089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,0.9677226543426514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,128,1,fp8,fp8,0,0.3362079858779907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,0.9718613624572754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,0.9035306771596273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,float16,0,0.3614879846572876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,fp8,0,0.36790935198465985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,0.9796000321706136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,128,1,fp8,fp8,0,0.34497066338857013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,0.9875679810841879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,0.9143359661102295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,float16,0,0.20990399519602457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,0.5401706695556641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,fp8,0,0.21556266148885092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,128,1,fp8,fp8,0,0.2060906688372294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,0.5572746594746908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,0.5077813466389974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,float16,0,0.18469866116841635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,fp8,0,0.18716265757878622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,0.509002685546875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,128,1,fp8,fp8,0,0.1786293387413025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,0.513263980547587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,0.47856001059214276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,float16,0,0.18725866079330444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,0.5120319922765096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,fp8,0,0.18915732701619467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,128,1,fp8,fp8,0,0.18044267098108926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,0.5145386854807535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,0.48184001445770264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,float16,0,0.1913706660270691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,0.5179200172424316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,fp8,0,0.19493865966796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,128,1,fp8,fp8,0,0.1855093240737915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,0.5213919878005981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,0.48795731862386066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,float16,0,0.19823465744654337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,0.5263040065765381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,fp8,0,0.20182400941848755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,128,1,fp8,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,0.5289493401845297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,0.496341347694397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,float16,0,0.12424000104268391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,fp8,0,0.1262079974015554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,0.305893341700236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,128,1,fp8,fp8,0,0.12173866232236226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,0.3084426720937093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,0.2912213404973348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,float16,0,0.11063999931017558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.290608008702596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,fp8,0,0.11123733719189961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,128,1,fp8,fp8,0,0.10387200117111206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,0.2906399965286255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,float16,0,0.10970133543014526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,0.27162132660547894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,0.2895253300666809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,fp8,fp8,0,0.10515200098355611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,fp8,0,0.11023466785748799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,float16,0,0.11185600360234578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.2898400028546651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,0.2730986674626668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,fp8,0,0.11157866319020589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,128,1,fp8,fp8,0,0.1074666678905487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.2924586733182271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.29181333382924396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,0.2756906747817993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,float16,0,0.11586667100588481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,fp8,0,0.11730666955312093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,128,1,fp8,fp8,0,0.11412266890207927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.2957119941711426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,float16,0,0.0885653297106425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,0.2985866665840149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,0.2815093398094177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,fp8,0,0.08823999762535095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,128,1,fp8,fp8,0,0.08331733445326488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.19852266709009805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.1854026714960734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,float16,0,0.08725333213806152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.19735467433929443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,fp8,0,0.08788266777992249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,128,1,fp8,fp8,0,0.08486933509508769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.20174932479858398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.18602667252222696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,float16,0,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.1980746587117513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,fp8,0,0.08854400118192036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,128,1,fp8,fp8,0,0.08468266328175862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.19890666007995605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.18505066633224487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,float16,0,0.08699199557304382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.19771732886632284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,fp8,0,0.08707732955614726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,128,1,fp8,fp8,0,0.08460799853006999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.19923200209935507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.18661866585413614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,float16,0,0.08682133754094441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,fp8,0,0.08855467041333516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.19746132691701254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,128,1,fp8,fp8,0,0.08452266454696655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.19758933782577515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.18513067563374838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,float16,0,0.8562026818593343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,fp8,0,0.8617226282755533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,1.8495680491129558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,128,1,fp8,fp8,0,0.7971413135528564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,1.8589653968811035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,1.7188587188720703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,float16,0,0.874288002649943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,fp8,0,0.8793439865112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,1.8695093790690105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,128,1,fp8,fp8,0,0.8176266352335612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,1.8774399757385254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,1.7383306821187336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,float16,0,0.9030933380126953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,fp8,0,0.8971947034200033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,1.8892265955607097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,128,1,fp8,fp8,0,0.835914691289266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,1.895685354868571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,1.7576905886332195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,float16,0,0.9223039944966634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,fp8,0,0.9294986724853516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,1.924773375193278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,128,1,fp8,fp8,0,0.8723200162251791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,1.9314667383829753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,1.7963733673095703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,float16,0,0.4970346689224243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,fp8,0,0.5067733526229858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,1.0133333206176758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,128,1,fp8,fp8,0,0.4814613262812297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,1.0238933563232422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,0.9552480379740397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,float16,0,0.4359946648279826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,0.9483253161112467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,fp8,0,0.4407573143641154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,128,1,fp8,fp8,0,0.4087573289871216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,0.9508746465047201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,float16,0,0.44230401515960693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,0.8824053605397543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,fp8,0,0.44671467940012616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,0.9534347057342529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,128,1,fp8,fp8,0,0.4164053201675415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,0.9580746491750082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,0.8890453179677328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,float16,0,0.44883732000986737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,0.9616479873657227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,fp8,0,0.45506131649017334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,128,1,fp8,fp8,0,0.4254719813664754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,0.9678773085276285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,0.8986240228017172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,float16,0,0.46366933981577557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,fp8,0,0.4794400135676066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,0.9789386590321859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,128,1,fp8,fp8,0,0.44103999932607013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,0.9854933420817057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,0.9151306947072347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,float16,0,0.26259734233220416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,0.5314346551895142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,fp8,fp8,0,0.2611466646194458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,0.5395466486612955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,0.5019946495691935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,float16,0,0.22815465927124023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,0.5069813330968221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,fp8,0,0.23650133609771729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,128,1,fp8,fp8,0,0.21980265776316324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,0.4989386796951294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,0.4665173292160034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,float16,0,0.23029865821202597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,0.4984906514485677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,fp8,0,0.23314666748046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,128,1,fp8,fp8,0,0.2229599952697754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,0.5009760061899821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,0.4696693420410156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,float16,0,0.23869333664576212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,0.5160640080769857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,fp8,0,0.2404693365097046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,128,1,fp8,fp8,0,0.22846933205922446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,0.5088746547698975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,0.47514665126800537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,float16,0,0.2448373238245646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,0.5146506627400717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,fp8,0,0.25019200642903644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,128,1,fp8,fp8,0,0.24017600218454996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,0.5194666783014933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,0.4849280118942261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,float16,0,0.14410666624704996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,0.29183467229207355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,fp8,0,0.14636799693107605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,128,1,fp8,fp8,0,0.14193600416183472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,0.2956639925638835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,0.28268800179163617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,float16,0,0.12662399808565775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.27157866954803467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,fp8,0,0.12572266658147177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,128,1,fp8,fp8,0,0.11864533027013142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.2735893328984578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,0.25498666365941364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,float16,0,0.12416533629099528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.2711413304011027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,fp8,0,0.12596799929936728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,128,1,fp8,fp8,0,0.11990933616956075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.2736426591873169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,0.2569813330968221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,float16,0,0.12796266873677573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.27587733666102093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,fp8,0,0.12988266348838806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,128,1,fp8,fp8,0,0.12455999851226807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.27643199761708576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,0.26312534014383954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,float16,0,0.13160000244776407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.2795146703720093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,fp8,0,0.13395733634630838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,128,1,fp8,fp8,0,0.1323199967543284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.28172266483306885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,0.26950933535893756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,float16,0,0.08474133412043254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.16894400119781494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,fp8,0,0.08666132887204488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,128,1,fp8,fp8,0,0.08597333232561748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.1730133295059204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.16474666198094687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,float16,0,0.07867200175921123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.16269866625467935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,fp8,0,0.07867200175921123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,128,1,fp8,fp8,0,0.07448000212510426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.16471466422080994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.15253333250681558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,float16,0,0.0784800002972285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.16290666659673056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,fp8,0,0.0787360022465388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,128,1,fp8,fp8,0,0.07564799984296162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.16516799728075662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.15471466382344565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,float16,0,0.07973866661389668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.16474133729934692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,fp8,0,0.0792906681696574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,128,1,fp8,fp8,0,0.07584000130494435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.16527466972668967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.154858668645223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,float16,0,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.16609066724777222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,fp8,0,0.08115200201670329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,128,1,fp8,fp8,0,0.07962133487065633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.16852800051371256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.15875200430552164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,float16,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.11956266562143962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,128,1,fp8,fp8,0,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.11998933553695679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.11335466305414836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,float16,0,0.0625439981619517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.12159466743469238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,fp8,0,0.06365866462389629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,128,1,fp8,fp8,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.11982400218645732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.11386133233706157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,float16,0,0.06317333380381267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.11960533261299133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,fp8,0,0.0624533345301946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,128,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.11963199575742085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.11365866661071777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,float16,0,0.06293333570162456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.12138666709264119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,fp8,0,0.06374933322270711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,128,1,fp8,fp8,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.11998933553695679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.11349333326021831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,float16,0,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.12153599659601848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,fp8,0,0.06397333244482677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,128,1,fp8,fp8,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.12066133817036946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.11372266213099162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,float16,0,0.6457813183466593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,1.1771306991577148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,fp8,0,0.6504799922307333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,128,1,fp8,fp8,0,0.5987039804458618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,1.1802293459574382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,1.0941332976023357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,float16,0,0.6573333342870077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,1.1875733534495037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,fp8,0,0.6596533457438151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,128,1,fp8,fp8,0,0.6126399834950765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,1.1896373430887859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,1.1304319699605305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,float16,0,0.6699413458506266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,1.2008372942606609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,fp8,0,0.6723732948303223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,128,1,fp8,fp8,0,0.6402453184127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,1.2062346935272217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,1.1232799688975017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,float16,0,0.6932746569315592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,1.2270666758219402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,fp8,0,0.6976213455200195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,128,1,fp8,fp8,0,0.6558613379796346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,1.2325119972229004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,1.1515733400980632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,float16,0,0.3860479990641276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,0.6586346626281738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,fp8,0,0.38547734419504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,128,1,fp8,fp8,0,0.36480534076690674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,0.6775466601053873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,0.6212480068206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,float16,0,0.3306399981180827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,0.6054346561431885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,fp8,0,0.336736003557841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,128,1,fp8,fp8,0,0.3144320050875346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,0.6091786623001099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,0.5675520102183024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,float16,0,0.3357173204421997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,0.6106239954630533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,fp8,0,0.3393973509470622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,128,1,fp8,fp8,0,0.31749866406122845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,0.6257439851760864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,0.5730186700820923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,float16,0,0.34324268500010174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,0.6205706596374512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,fp8,0,0.3543413480122884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,128,1,fp8,fp8,0,0.3307573397954305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,0.6218986511230469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,0.5808906555175781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,float16,0,0.3555946747461955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,0.6313120126724243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,fp8,0,0.3611253499984741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,128,1,fp8,fp8,0,0.33699198563893634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,0.6363946596781412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,0.5920639832814535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,float16,0,0.20097599426905313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,0.350544015566508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,fp8,0,0.20770132541656494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,128,1,fp8,fp8,0,0.19858133792877197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,0.3550293445587158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,0.3330720067024231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,float16,0,0.1732693314552307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,0.319760004679362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,fp8,0,0.17537067333857217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,128,1,fp8,fp8,0,0.16694400707880655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,0.32037333647410077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,0.30357333024342853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,float16,0,0.17481066783269247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,0.3216266632080078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,fp8,0,0.17817066113154092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,128,1,fp8,fp8,0,0.1710933248202006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,0.32435200611750287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,0.30611733595530194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,float16,0,0.18104000886281332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,0.3259733319282532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,fp8,0,0.18345600366592407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,128,1,fp8,fp8,0,0.17717333634694418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,0.32944534222284955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,0.310970664024353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,float16,0,0.1892533302307129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,0.33604268232981366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,fp8,0,0.1929653286933899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,128,1,fp8,fp8,0,0.18333866198857626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,0.3386186758677165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,0.3185439904530843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,float16,0,0.11173866192499797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.194650669892629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,fp8,0,0.11487467090288798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,128,1,fp8,fp8,0,0.11226666967074077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.19760000705718994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.18914665778477988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,float16,0,0.0983893374602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.17871999740600586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,fp8,0,0.0976746678352356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,128,1,fp8,fp8,0,0.09277866284052531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.18109333515167236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.1686346729596456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,float16,0,0.09779199957847595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.1792373259862264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,fp8,0,0.09891200065612793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,128,1,fp8,fp8,0,0.0937653382619222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.18204265832901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.170799990495046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,float16,0,0.09930133819580078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.18137067556381226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,128,1,fp8,fp8,0,0.09553600351015727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.1831573247909546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.1727573275566101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,float16,0,0.10381333033243816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.18528000513712564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,fp8,0,0.10565333565076192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,128,1,fp8,fp8,0,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.18854933977127075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.18034666776657104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,float16,0,0.06650666892528534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.11559999982515971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,fp8,0,0.06828799843788147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,128,1,fp8,fp8,0,0.06624533236026764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.1176639993985494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.11337600151697795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,float16,0,0.0625546673933665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.11170132954915364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,fp8,0,0.0624533345301946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,128,1,fp8,fp8,0,0.058506667613983154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.11185066898663838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.1056106686592102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,float16,0,0.06244266529877981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.11151466766993205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,fp8,0,0.062165334820747375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,128,1,fp8,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.11230933666229248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.1067733367284139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,float16,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.11218667030334473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,fp8,0,0.06266133487224579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,128,1,fp8,fp8,0,0.06158400078614553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.11386133233706157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,float16,0,0.06409599880377452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.11336533228556316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,128,1,fp8,fp8,0,0.06281599899133046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.11570133765538533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.10769599676132202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,128,1,fp8,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.08080533146858215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,float16,0,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.08469333251317342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,fp8,0,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,128,1,fp8,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.08478933572769165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.0813973347345988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,float16,0,0.055125330885251365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.08526933193206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,fp8,0,0.05457599957784017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,128,1,fp8,fp8,0,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.08526933193206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.08101333181063335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,float16,0,0.054431999723116554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.084714670976003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,fp8,0,0.0545066644748052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,128,1,fp8,fp8,0,0.05148266752560934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.08061866462230682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,float16,0,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.0851039985815684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,128,1,fp8,fp8,0,0.05101866523424784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.08489599823951721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.08086933195590973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,float16,0,0.8587253093719482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,1.26636799176534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,fp8,0,0.8585013548533121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,128,1,fp8,fp8,0,0.7862559954325358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,1.2684319814046223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,1.1713813145955403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,float16,0,0.8752960364023844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,1.2869386672973633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,fp8,0,0.8767146269480387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,128,1,fp8,fp8,0,0.8025333086649576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,1.2882453600565593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,1.1857813199361165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,float16,0,0.8934773604075114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,1.3040906588236492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,fp8,0,0.8966933091481527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,128,1,fp8,fp8,0,0.8360959688822428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,1.3062986532847087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,1.2071359952290852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,float16,0,0.9310399691263834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,1.3331999778747559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,fp8,0,0.9390292962392172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,128,1,fp8,fp8,0,0.858896017074585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,1.337824026743571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,1.2551679611206055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,float16,0,0.49458134174346924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,0.7119786739349365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,fp8,0,0.504314661026001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,128,1,fp8,fp8,0,0.48739198843638104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,0.7206346988677979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,0.6778293450673422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,float16,0,0.43061331907908124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,0.6424213250478109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,fp8,0,0.4356693426767985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,128,1,fp8,fp8,0,0.40324799219767254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,0.6453493436177572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,0.6019253333409628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,float16,0,0.43779198328653973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,0.6635626554489136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,fp8,0,0.4427786668141683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,128,1,fp8,fp8,0,0.412773331006368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,0.6544479926427206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,0.6100000143051147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,float16,0,0.45527466138203937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,0.6588639815648397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,fp8,0,0.45081599553426105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,128,1,fp8,fp8,0,0.4232586622238159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,0.6712586879730225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,0.6192320187886556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,float16,0,0.4611733357111613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,0.6752320130666097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,fp8,0,0.46795201301574707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,128,1,fp8,fp8,0,0.43751466274261475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,0.6880586942036947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,0.6358666817347208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,float16,0,0.257258673508962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,0.37165331840515137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,fp8,0,0.2691359917322795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,128,1,fp8,fp8,0,0.2566080093383789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,0.3556053241093953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,0.37860798835754395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,float16,0,0.22217067082722983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,0.33275200923283893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,fp8,0,0.22603732347488403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,128,1,fp8,fp8,0,0.2139093279838562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,0.3362773259480794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,0.3187999924023946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,float16,0,0.22625599304835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,0.3368106683095296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,fp8,0,0.229802668094635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,128,1,fp8,fp8,0,0.21785600980122885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,0.3433599869410197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,0.33619733651479083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,float16,0,0.23471999168395996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,0.3450080156326294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,fp8,0,0.23485867182413736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,128,1,fp8,fp8,0,0.22421334187189737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,0.346341331799825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,0.3266826669375102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,float16,0,0.24054400126139322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,0.3532319863637288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,fp8,0,0.24514132738113403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,128,1,fp8,fp8,0,0.23131199677785239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,0.3580426772435506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,0.3352053165435791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,float16,0,0.1381066640218099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.2018400033315023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,fp8,0,0.14136000474294028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,128,1,fp8,fp8,0,0.1376053293546041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.20604799191157022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.19581333796183267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,float16,0,0.11773332953453064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.18102933963139853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,fp8,0,0.1202239990234375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,128,1,fp8,fp8,0,0.1144480009873708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.1833440065383911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.17234132687250772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,float16,0,0.11797333757082622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.18095467487970987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,fp8,0,0.12185066938400269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.18317866325378418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,128,1,fp8,fp8,0,0.1157973309357961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.17245332400004068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,float16,0,0.12251200278600057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.18497065703074136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,fp8,0,0.12398399909337361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,128,1,fp8,fp8,0,0.12026133139928182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.18900267283121744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.17795199155807495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,float16,0,0.12917866309483847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.19026132424672446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,fp8,0,0.13205333550771078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,128,1,fp8,fp8,0,0.12776000301043192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.19476266702016196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.18327999114990234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,float16,0,0.07699200014273326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.11343466242154439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,fp8,0,0.07965333263079326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,128,1,fp8,fp8,0,0.08063466846942902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.11633066336313884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.11342400312423706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,float16,0,0.07247999807198842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.10730133454004924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,128,1,fp8,fp8,0,0.06858666737874348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.11502400040626526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.10083199540774028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,float16,0,0.07553599774837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.10693867007891338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,fp8,0,0.07228266696135204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,128,1,fp8,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.10937066872914632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,float16,0,0.07291733225186665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.10758933424949646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,fp8,0,0.07463466624418895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,128,1,fp8,fp8,0,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.10917333761850993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.10316800077756245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,float16,0,0.0746559997399648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,fp8,0,0.07769600053628285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.10923199852307637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,128,1,fp8,fp8,0,0.07446399827798207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.10983999570210774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,float16,0,0.0484746644894282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.10599467158317566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,128,1,fp8,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.07642666498819987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.07273600002129872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.07270933190981548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,fp8,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.07235200206438701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.06764799853165944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,float16,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.07251733541488647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,128,1,fp8,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.07247999807198842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.06863466898600261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,float16,0,0.04612799982229868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.07256000240643819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,128,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.06869866450627644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,float16,0,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.07447466750939687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,fp8,0,0.04978133241335551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,128,1,fp8,fp8,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.07394133508205414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.07050133248170216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,fp8,0,0.03843733419974645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,128,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,float16,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.054714664816856384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,fp8,0,0.03876800090074539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,128,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.05470400055249532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.052576000491778054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.054287999868392944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,128,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.05221333106358846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,128,1,fp8,fp8,0,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.05227200190226237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,float16,0,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,128,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.0561706672112147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,float16,0,0.6621386607487997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,0.8616373538970947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,fp8,0,0.6624000072479248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,128,1,fp8,fp8,0,0.6113066673278809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,0.8666293621063232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,0.8042399883270264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,float16,0,0.6862080097198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,0.8888639609018961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,fp8,0,0.685365358988444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,128,1,fp8,fp8,0,0.6241439978281657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,0.8871626853942871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,0.8160906632741293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,float16,0,0.7002773284912109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,0.9019253253936768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,fp8,0,0.6967466672261556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,128,1,fp8,fp8,0,0.6382613182067871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,0.9008586406707764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,0.8301119804382324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,float16,0,0.7236800193786621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,0.9262133439381918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,fp8,0,0.7146506309509277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,128,1,fp8,fp8,0,0.661898652712504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,0.9195306301116943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,0.8544533252716064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,float16,0,0.3875360091527303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,0.4970346689224243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,fp8,0,0.38947733243306476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,128,1,fp8,fp8,0,0.3701440095901489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,0.49879467487335205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,0.47041598955790204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,float16,0,0.33293867111206055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,0.43915732701619464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,fp8,0,0.3355626662572225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,128,1,fp8,fp8,0,0.312608003616333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,0.4402506748835246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,0.41205334663391113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,float16,0,0.33859201272328693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,0.44310398896535236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,fp8,0,0.34116268157958984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,128,1,fp8,fp8,0,0.32076799869537354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,0.44494398434956867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,0.41908268133799237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,float16,0,0.3476159969965617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,0.45339731375376385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,fp8,0,0.35037867228190106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,128,1,fp8,fp8,0,0.32707200447718304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,0.45526401201883954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,0.4267786741256714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,float16,0,0.35893865426381427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,0.4668639898300171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,fp8,0,0.36152533690134686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,128,1,fp8,fp8,0,0.33880531787872314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,0.46698665618896484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,0.43746666113535565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,float16,0,0.20138667027155557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,0.2600959936777751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,fp8,0,0.2055306633313497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,128,1,fp8,fp8,0,0.19662932554880777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,0.2635040084520976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,0.2490346630414327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,float16,0,0.16899732748667398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.22402666012446085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,fp8,0,0.1709973414738973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,128,1,fp8,fp8,0,0.16521599888801575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.2262079914410909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.21792000532150269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,float16,0,0.17191465695699057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.22858132918675741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,fp8,0,0.17316800355911255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,128,1,fp8,fp8,0,0.16714666287104288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.22883733113606772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.21996800104777017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,float16,0,0.17929067214330038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.23586666584014893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,fp8,0,0.18026665846506754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,128,1,fp8,fp8,0,0.17402132352193198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.2366080085436503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.22882133722305298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,float16,0,0.1880693236986796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.24566932519276938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,fp8,0,0.19036267201105753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,128,1,fp8,fp8,0,0.18098666270573935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.24651734034220377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,0.23390400409698486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,float16,0,0.10805333654085796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.14056000113487244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,fp8,0,0.11110400160153706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,128,1,fp8,fp8,0,0.1095413366953532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.14245866735776266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.13915200034777322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,float16,0,0.09322133660316467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.12551466623942056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,fp8,0,0.09377599755922954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,128,1,fp8,fp8,0,0.08694400389989217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.12589866916338602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.11743999520937602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,float16,0,0.09380799531936646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.12581866979599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,fp8,0,0.09510933359464009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,128,1,fp8,fp8,0,0.0888320008913676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.12590932846069336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.11775466799736023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,float16,0,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.1258240044116974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,fp8,0,0.09741866588592529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,128,1,fp8,fp8,0,0.09318400422732036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.12866666913032532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.12319999933242798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,float16,0,0.09936533371607463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.1301706631978353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,fp8,0,0.10099732875823975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,128,1,fp8,fp8,0,0.09924266735712688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.13196800152460733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.12762666742006937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,float16,0,0.060415998101234436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.08185066779454549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,fp8,0,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,128,1,fp8,fp8,0,0.06383466720581055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.08460799853006999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.08211733400821686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,float16,0,0.056741332014401756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.07774400214354198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,128,1,fp8,fp8,0,0.055861334005991616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.08003733555475871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.07275733351707458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,float16,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,fp8,0,0.05841066439946493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,128,1,fp8,fp8,0,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.08029866715272267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.07508799930413564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,float16,0,0.058304001887639366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.07896533111731212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,fp8,0,0.060032000144322716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,128,1,fp8,fp8,0,0.05575466652711233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.08040000001589458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,float16,0,0.06002666552861532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.0790293316046397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,fp8,0,0.062394668658574425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,128,1,fp8,fp8,0,0.05872533222039541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.08285866677761078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.07876800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,float16,0,0.044026667873064675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.05586666862169901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,fp8,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,128,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.057578667998313904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.05462400118509928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.05421866476535797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,128,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.05421333511670431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,fp8,0,0.04200000067551931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,128,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.056703999638557434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,float16,0,0.04372799893220266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.05557866891225179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,128,1,fp8,fp8,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.0544053316116333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,float16,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.05629333357016245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,fp8,0,0.04301866888999939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.05648533503214518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,float16,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,128,1,fp8,fp8,0,0.032298666735490165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.044453332821528115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.04252266883850098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,float16,0,0.03417066733042399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,fp8,0,0.033930666744709015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,128,1,fp8,fp8,0,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.0443200021982193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.041936000188191734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,128,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.044453332821528115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.04200000067551931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,128,1,fp8,fp8,0,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,128,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,float16,0,0.7744906743367513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,0.905407985051473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,fp8,0,0.7707946300506592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,128,1,fp8,fp8,0,0.7222346464792887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,0.8989760080973307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,0.8444320360819498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,float16,0,0.7865546544392904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,0.9123733043670654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,fp8,0,0.7799680233001709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,128,1,fp8,fp8,0,0.7251413663228353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,0.9067626794179281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,0.8548906644185384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,float16,0,0.8029706478118896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,0.9300639629364014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,fp8,0,0.7940639654795328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,128,1,fp8,fp8,0,0.8468693097432455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,0.927893320719401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,0.9696319897969564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,float16,0,0.7840853532155355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,0.9157813390096029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,fp8,0,0.7728213469187418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,128,1,fp8,fp8,0,0.821674664815267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,0.9050666491190592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,0.9525226751963297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,float16,0,0.4119360049565633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,0.48233600457509357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,fp8,0,0.4055360158284505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,128,1,fp8,fp8,0,0.4120853344599406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,0.4743306636810303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,0.47764265537261963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,float16,0,0.3980906804402669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,0.46298666795094806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,fp8,0,0.39667201042175293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,128,1,fp8,fp8,0,0.36899733543395996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,0.4623253345489502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,0.43001067638397217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,float16,0,0.40268266201019287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,0.46885331471761066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,fp8,0,0.40117335319519043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,128,1,fp8,fp8,0,0.37350932757059735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,0.4662453333536784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,0.43561065196990967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,float16,0,0.4105120102564494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,0.4766026735305786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,fp8,0,0.4094293514887492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,128,1,fp8,fp8,0,0.4148799975713094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,0.4749866724014282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,0.4747786521911621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,float16,0,0.4010453224182129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,0.4696906805038452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,fp8,0,0.39683731396993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,128,1,fp8,fp8,0,0.40406401952107746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,0.46400535106658936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,0.4675519863764445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,float16,0,0.21588265895843506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,0.2531999945640564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,fp8,0,0.21370132764180502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,128,1,fp8,fp8,0,0.2174933354059855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,0.25116799275080365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,0.2518293261528015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,float16,0,0.2087519963582357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.2432533303896586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,fp8,0,0.2077173391977946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,128,1,fp8,fp8,0,0.1942453384399414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.24221332867940268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.2265066703160604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,float16,0,0.21154133478800455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.24463466803232828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,fp8,0,0.20988800128300986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,128,1,fp8,fp8,0,0.19734932978947958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.2448373238245646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.23061867554982504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,float16,0,0.21396799882253012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.25081600745519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,fp8,0,0.2142560084660848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,128,1,fp8,fp8,0,0.20985066890716553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.25041600068410236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.24329066276550293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,float16,0,0.2108586629231771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.2462666630744934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,fp8,0,0.20973867177963257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,128,1,fp8,fp8,0,0.2082293430964152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.24498667319615683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,0.24305067459742227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,float16,0,0.1190880040327708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.14018133282661438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,fp8,0,0.116757333278656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,128,1,fp8,fp8,0,0.11932800213495891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.1397279997666677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.13988266388575235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,float16,0,0.1127839982509613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.1322826643784841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,fp8,0,0.10967999696731567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,128,1,fp8,fp8,0,0.10523200035095215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.13211199641227722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.12408000230789185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,float16,0,0.11226133505503337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.13306132952372232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,fp8,0,0.1123413344224294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,128,1,fp8,fp8,0,0.10731200377146403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.13427199920018515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.12770666678746542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,float16,0,0.11575999855995178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.13666133085886636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,fp8,0,0.11471466223398845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,128,1,fp8,fp8,0,0.11311999956766765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.1360319952170054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.13329600294431052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,float16,0,0.11597333351771037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.13818666338920593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,fp8,0,0.11451733112335205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,128,1,fp8,fp8,0,0.11301866173744202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.13554666439692178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.13290133078893027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.07859733204046886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,fp8,0,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,128,1,fp8,fp8,0,0.0681279997030894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.07842133442560832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.08072000245253245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,float16,0,0.06591466565926869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.07668800155321757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,128,1,fp8,fp8,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.07254933317502339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,float16,0,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.07650133470694225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,fp8,0,0.06646400193373363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,128,1,fp8,fp8,0,0.06249066690603892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.0766133318344752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.07230400045712788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,float16,0,0.06559999783833821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.0767146646976471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,128,1,fp8,fp8,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.07896000146865845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.07566933333873749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,fp8,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,128,1,fp8,fp8,0,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,float16,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.05018133421738943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,128,1,fp8,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.051738664507865906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.051781331499417625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,128,1,fp8,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.05003199974695841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.05187733471393585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,fp8,0,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,128,1,fp8,fp8,0,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,float16,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,128,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.051669334371884666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,128,1,fp8,fp8,0,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.050186668833096824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.05086400111516317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,float16,0,0.028149334092934925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.034160000582536064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,128,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.03384533276160558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,float16,0,0.028010666370391846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,128,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.03396799912055334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,float16,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,128,1,fp8,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.03245333333810171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,128,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,float16,0,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,128,1,fp8,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,128,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,128,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,128,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,float16,0,0.7538293202718099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,0.7622186342875162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,fp8,0,0.7518613338470459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,128,1,fp8,fp8,0,0.7005226612091064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,0.7562879721323649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,0.7080427010854086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,float16,0,0.7642133235931396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,0.7701653639475504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,fp8,0,0.7584640185038248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,128,1,fp8,fp8,0,0.7070133686065674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,0.7657120227813721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,0.7195093631744385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,float16,0,0.7798933188120524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,0.7878506978352865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,fp8,0,0.7752106984456381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,128,1,fp8,fp8,0,0.8195466995239258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,0.7803680102030436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,0.8317279815673828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,float16,0,0.7609653472900391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,0.7712213198343912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,fp8,0,0.7501546541849772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,128,1,fp8,fp8,0,0.7974346478780111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,0.7586186726888021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,0.8130506674448649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,float16,0,0.39986133575439453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,0.40618666013081867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,fp8,0,0.3914506832758586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,128,1,fp8,fp8,0,0.40109864870707196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,0.39661868413289386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,0.40642666816711426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,float16,0,0.38788799444834393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,0.3911306858062744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,fp8,0,0.3840320110321045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,128,1,fp8,fp8,0,0.35733866691589355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,0.3850933313369751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,0.36271464824676514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,float16,0,0.39217066764831543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,0.3927146593729655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,fp8,0,0.38891200224558514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,128,1,fp8,fp8,0,0.36338667074839276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,0.3928053379058838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,0.3678079843521118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,float16,0,0.3998986482620239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,0.403162678082784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,fp8,0,0.39823468526204425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,128,1,fp8,fp8,0,0.4072959820429484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,0.3997386693954468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,0.4097013473510742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,float16,0,0.38978668053944904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,0.3938560088475545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,fp8,0,0.386133352915446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,128,1,fp8,fp8,0,0.39578131834665936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,0.39028799533843994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,0.396016001701355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,float16,0,0.21109867095947266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,0.21403199434280396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,fp8,0,0.20843732357025146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,128,1,fp8,fp8,0,0.2118026614189148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,0.2097813288370768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,0.21361599365870157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,float16,0,0.20245333512624106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.20360000928243002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,fp8,0,0.202074666817983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,128,1,fp8,fp8,0,0.18923733631769815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.20339200894037882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.19121599197387695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,float16,0,0.2050186594327291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.20661866664886475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,fp8,0,0.20458666483561197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,128,1,fp8,fp8,0,0.1912426749865214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.20545599857966104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.19349332650502524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,float16,0,0.20905067523320517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.21083199977874756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,fp8,0,0.20974934101104736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,128,1,fp8,fp8,0,0.20254399379094443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.21144000689188638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.2068106730779012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,float16,0,0.20435200134913126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.20646933714548746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,fp8,0,0.20266133546829224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,128,1,fp8,fp8,0,0.20251200596491495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.2057653268178304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.20631466309229532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,float16,0,0.11591999729474385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.11768533786137898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,fp8,0,0.11548800269762675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,128,1,fp8,fp8,0,0.11746666828791301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.1176533301671346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.11965333422025044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,float16,0,0.10921600461006165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.10955199599266052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,fp8,0,0.10905599594116211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,128,1,fp8,fp8,0,0.10160000125567119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.1113866666952769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.10505066315333049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,float16,0,0.11145066221555074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.11316800117492676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,fp8,0,0.10958932836850484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,128,1,fp8,fp8,0,0.10433600346247356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,float16,0,0.11373333136240642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.114464004834493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,fp8,0,0.1135040024916331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,128,1,fp8,fp8,0,0.11152533690134685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.11506666739781697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.11342933773994446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,float16,0,0.11365333199501038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.1141973336537679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,fp8,0,0.10983467102050781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,128,1,fp8,fp8,0,0.11160533626874287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.11161067088445027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.11265066266059875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,float16,0,0.06422933439413707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.06505600114663442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,fp8,0,0.06318933268388112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,128,1,fp8,fp8,0,0.06625066697597504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.06585066517194112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.06705066561698914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,float16,0,0.0641653339068095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.06446399788061778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,fp8,0,0.06237333516279856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,128,1,fp8,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,float16,0,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.06445866823196411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,fp8,0,0.06283733248710632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,128,1,fp8,fp8,0,0.06075733403364817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.06306133170922597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,float16,0,0.06453333298365276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.06453333298365276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,fp8,0,0.06363733112812042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,128,1,fp8,fp8,0,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.06451733410358429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.06266666452089946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,float16,0,0.06426133215427399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.06486399968465169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,fp8,0,0.06300800045331319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,128,1,fp8,fp8,0,0.06317866841952006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.06496533254782359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.0646613339583079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,float16,0,0.043162668744723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,fp8,0,0.041936000188191734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,128,1,fp8,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,fp8,0,0.04045333216587702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,128,1,fp8,fp8,0,0.039994666973749794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.043290664752324425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,float16,0,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,128,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,float16,0,0.04334400097529093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.04426133135954539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,fp8,0,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,128,1,fp8,fp8,0,0.040463998913764954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.04409066836039225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,float16,0,0.04207466542720795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.04359999795754751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,128,1,fp8,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.04196799794832865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,fp8,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,128,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,float16,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,128,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,128,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,128,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,float16,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,float16,0,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,128,1,fp8,fp8,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,128,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,128,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,float16,0,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,128,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,float16,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,float16,0,0.3565760056177775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,0.3500106732050578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,fp8,0,0.3516800006230672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,128,1,fp8,fp8,0,0.32683199644088745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,0.3461759885152181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.3243199984232585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,float16,0,0.3622453212738037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,0.3550399939219157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,fp8,0,0.3598666588465373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,128,1,fp8,fp8,0,0.33515731493632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,0.3514133294423421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,0.3306186596552531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,float16,0,0.37218133608500165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,0.36744534969329834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,fp8,0,0.37138132254282635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,128,1,fp8,fp8,0,0.3765546480814616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,0.3651306629180908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,0.36894933382670086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,float16,0,0.3622613350550334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,0.3545440038045247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,fp8,0,0.3598346710205078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,128,1,fp8,fp8,0,0.3693759838740031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,0.354202667872111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,0.36367468039194745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,float16,0,0.19777599970499674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,0.19413334131240845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,fp8,0,0.19357333580652872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,128,1,fp8,fp8,0,0.197434663772583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,0.19138665994008383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.19537599881490073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,float16,0,0.18666134277979532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.18303465843200684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,fp8,0,0.1837600072224935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,128,1,fp8,fp8,0,0.1749653418858846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.18199467658996582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.1711840033531189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,float16,0,0.1914506753285726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.18755199511845908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,fp8,0,0.18827199935913086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,128,1,fp8,fp8,0,0.17721599340438843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.18531733751296997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.17497599124908447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,float16,0,0.19723733266194662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.19351466496785483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,fp8,0,0.19323732455571493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,128,1,fp8,fp8,0,0.18942399819691977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.19219734271367392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.1861120065053304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,float16,0,0.19153066476186117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.1891253391901652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,fp8,0,0.18859734137852988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,128,1,fp8,fp8,0,0.18808533747990927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.18604799111684164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.18385066588719687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,float16,0,0.1085653305053711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.10693333546320598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,128,1,fp8,fp8,0,0.10972266395886739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.10556800166765849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.10745599865913391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,float16,0,0.10105066498120625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.09902933239936829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,fp8,0,0.09905067086219788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,128,1,fp8,fp8,0,0.09393067161242168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.09738133351008098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.09267733494440715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,float16,0,0.10220799843470256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.10115200281143188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,fp8,0,0.10114133358001709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,128,1,fp8,fp8,0,0.09847999612490337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.09934399525324504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,float16,0,0.10636267066001892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.10524266958236694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,fp8,0,0.10653866330782573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,128,1,fp8,fp8,0,0.10460799932479858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.10126933455467224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.10077333450317383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,float16,0,0.10352533062299092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.10225600004196167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,fp8,0,0.10317333539326985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,128,1,fp8,fp8,0,0.10408000151316325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.10124267141024272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.10131200154622395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,float16,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.05857066810131073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,fp8,0,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,128,1,fp8,fp8,0,0.06346666812896729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.0625546673933665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,float16,0,0.057962665955225624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.056330665946006775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,fp8,0,0.058975999553998314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,128,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,float16,0,0.05880000193913778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,128,1,fp8,fp8,0,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.056277334690093994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,float16,0,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,fp8,0,0.060047999024391174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,128,1,fp8,fp8,0,0.058464000622431435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.05853333572546641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,float16,0,0.05926933387915293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.05813866853713989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,fp8,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,128,1,fp8,fp8,0,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.05853333572546641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,float16,0,0.037952000896135964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,128,1,fp8,fp8,0,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,float16,0,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,128,1,fp8,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,float16,0,0.03809066613515218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,128,1,fp8,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,float16,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,128,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.03888533264398575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.03786666691303253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,128,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,128,1,fp8,fp8,0,0.026127999027570088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,128,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,float16,0,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,128,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,128,1,fp8,fp8,0,0.020554666717847187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,128,1,fp8,fp8,0,0.020746666938066483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.020714666694402695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,128,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,float16,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,float16,0,0.19747199614842734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.19750400384267172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,fp8,0,0.19549334049224854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,128,1,fp8,fp8,0,0.19132800896962485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.19591466585795084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.19205333789189658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,float16,0,0.19882667064666748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.19794134298960367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,fp8,0,0.197269340356191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,128,1,fp8,fp8,0,0.1861706574757894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.1979466676712036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.18646933635075888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,float16,0,0.20178133249282837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.20381865898768106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,fp8,0,0.19900266329447427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,128,1,fp8,fp8,0,0.19849600394566855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.19995200634002686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.19874133666356406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,float16,0,0.2013439933458964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.20158400138219199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,fp8,0,0.20138132572174072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,128,1,fp8,fp8,0,0.19894933700561523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.20198933283487955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.19910933574040732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,float16,0,0.1129919985930125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.11300266782442729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,fp8,0,0.11081600189208984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,128,1,fp8,fp8,0,0.11336533228556316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.1111893355846405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.11370666821797688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,float16,0,0.1053493320941925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.10643733541170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,fp8,0,0.10543466607729594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,128,1,fp8,fp8,0,0.10154133041699727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.10412266850471497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.10111467043558757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,float16,0,0.1074133316675822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.10851732889811198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,fp8,0,0.10499733686447144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,128,1,fp8,fp8,0,0.10150399804115295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.10538666447003682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.10230400164922078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,float16,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.10920000076293945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,fp8,0,0.10738133390744527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,128,1,fp8,fp8,0,0.10938666264216106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.10776533683141072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.10916266838709514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,float16,0,0.10937600334485371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.11011733611424764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,fp8,0,0.11085333426793416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,128,1,fp8,fp8,0,0.1097920040289561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.10983999570210774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.10870933532714844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,float16,0,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.06084799766540527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,128,1,fp8,fp8,0,0.06418666740258534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.06385066608587901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,float16,0,0.058378666639328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.05835199852784475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,128,1,fp8,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.05835199852784475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,float16,0,0.05889600018660227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.05850133299827576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,fp8,0,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,128,1,fp8,fp8,0,0.05713599920272827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,float16,0,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,128,1,fp8,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.060165335734685264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,float16,0,0.06012799839178721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.05969599882761637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,fp8,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,128,1,fp8,fp8,0,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.060421332716941833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,float16,0,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,float16,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,fp8,0,0.038362666964530945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.03932266682386398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,float16,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,128,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,float16,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,128,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,float16,0,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,128,1,fp8,fp8,0,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.027823999524116516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.027727998793125153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,128,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,128,1,fp8,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,128,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,float16,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,128,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,128,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,float16,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,128,1,fp8,fp8,0,0.017770666629076004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,float16,0,0.14196800192197165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.14224533240000406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,fp8,0,0.14224533240000406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,128,1,fp8,fp8,0,0.13396799564361572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.14086932937304178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.1341333289941152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,float16,0,0.1420906682809194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.14222400387128195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,fp8,0,0.14033599694569907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,128,1,fp8,fp8,0,0.1334826648235321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.1421066621939341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.13372266292572021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,float16,0,0.14486400286356607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.14456533392270407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,fp8,0,0.14416000247001648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,128,1,fp8,fp8,0,0.14152533809343973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.1442400018374125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.14074666301409403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,float16,0,0.1461120049158732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.14588266611099243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,fp8,0,0.14403733611106873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,128,1,fp8,fp8,0,0.13983466227849325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.14617066582043967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.14064000050226846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,float16,0,0.08086933195590973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.08077866832415263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,fp8,0,0.07963733375072479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,128,1,fp8,fp8,0,0.08017066617806752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.08057599763075511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.08081066608428955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,float16,0,0.07707199951012929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,fp8,0,0.07647466659545898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,128,1,fp8,fp8,0,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.07747200131416321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.0739573339621226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,float16,0,0.07845866680145264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.07870933413505554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,fp8,0,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,128,1,fp8,fp8,0,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.07662400106589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,float16,0,0.07909866670767467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.07905599971612294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,fp8,0,0.07858133316040039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,128,1,fp8,fp8,0,0.07463466624418895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.0787360022465388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.07748266557852428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,float16,0,0.07974933087825775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,fp8,0,0.0767146646976471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,128,1,fp8,fp8,0,0.07689066727956136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.0780320018529892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.07653333246707916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,128,1,fp8,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.04734933376312256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,float16,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.04758933186531067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,fp8,0,0.04621333380540212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,128,1,fp8,fp8,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,float16,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.04799466828505198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,fp8,0,0.04890666902065277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,128,1,fp8,fp8,0,0.04613333443800608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.04609066744645437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.04427200059096018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,float16,0,0.04801600178082784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.046997333566347756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,fp8,0,0.04814933240413666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,128,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.046869332591692604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,float16,0,0.04770666857560476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.04794666667779287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,fp8,0,0.04766400158405304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,128,1,fp8,fp8,0,0.04773333172003428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,fp8,0,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,128,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,float16,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.0308746670683225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,128,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,128,1,fp8,fp8,0,0.03073599934577942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,float16,0,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,128,1,fp8,fp8,0,0.030623999734719593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.03107733279466629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,128,1,fp8,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,128,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,128,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,128,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,128,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,128,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,128,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,float16,0,0.11386133233706157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.11376532912254333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,fp8,0,0.11180266737937927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,128,1,fp8,fp8,0,0.10532266894976298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.1135040024916331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.10590933760007222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,float16,0,0.11351999640464783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.11374400059382121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,fp8,0,0.11180800199508667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,128,1,fp8,fp8,0,0.10541333754857381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.11332799990971883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.10545066992441814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,float16,0,0.11552000045776367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.11574400464693706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,fp8,0,0.11379733681678772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,128,1,fp8,fp8,0,0.10744532942771912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.11365866661071777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.10868799686431885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,float16,0,0.11400000254313152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.11371733744939168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,fp8,0,0.11338667074839275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,128,1,fp8,fp8,0,0.10841600100199382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.11377066373825073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.10923733313878377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,float16,0,0.06675733129183452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.06622933348019917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,fp8,0,0.0662666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,128,1,fp8,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.06633066634337108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.06412266691525777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,float16,0,0.06611733138561249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.06622399886449178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,fp8,0,0.06618666648864746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,128,1,fp8,fp8,0,0.062037333846092224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,float16,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,fp8,0,0.06423999865849812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,128,1,fp8,fp8,0,0.06136000156402588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.06420266628265381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.06144533554712931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,float16,0,0.06640533109505971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.0665280024210612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,fp8,0,0.06612800061702728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,128,1,fp8,fp8,0,0.06442666550477345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.06246933341026306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,float16,0,0.06643733382225037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.06637866795063019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,fp8,0,0.06635733445485432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,128,1,fp8,fp8,0,0.06401066482067108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.06508799890677135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.062421331803003945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,float16,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,128,1,fp8,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,float16,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,128,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,float16,0,0.03822399924198786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.03924266745646795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,128,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,float16,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,128,1,fp8,fp8,0,0.03817066550254822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,float16,0,0.03930133332808813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.19754666090011597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,float16,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,128,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,float16,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,128,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,fp8,0,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,128,1,fp8,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,128,1,fp8,fp8,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,128,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,128,1,fp8,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,float16,0,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,128,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,float16,0,0.10097600022951762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.10113599896430969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,fp8,0,0.09937600294748943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,128,1,fp8,fp8,0,0.09529067079226176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.09924266735712688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.09425066908200581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,float16,0,0.10110933581988017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.1011946698029836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,fp8,0,0.1011306643486023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,128,1,fp8,fp8,0,0.09332799911499023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.0932426651318868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,float16,0,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,128,1,fp8,fp8,0,0.0971999963124593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.10097066561381023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,float16,0,0.10140267014503479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.10116266210873921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,fp8,0,0.1013706624507904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,128,1,fp8,fp8,0,0.09723200400670369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.10101333260536194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.09704533219337463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,float16,0,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,128,1,fp8,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.058277333776156105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,float16,0,0.057999998331069946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,128,1,fp8,fp8,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.05646933118502299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,float16,0,0.057562669118245445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,fp8,0,0.05771199862162272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.05861333509286245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,128,1,fp8,fp8,0,0.05422399938106537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.05690666536490122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,float16,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,fp8,fp8,0,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,fp8,0,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.058133333921432495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,float16,0,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,fp8,0,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,128,1,fp8,fp8,0,0.0562720000743866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.05805333455403646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.05435733497142792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,fp8,0,0.035760000348091125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,128,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,float16,0,0.034330666065216064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,128,1,fp8,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,float16,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,128,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,float16,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,128,1,fp8,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,float16,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,128,1,fp8,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,fp8,0,0.026901334524154663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,128,1,fp8,fp8,0,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,128,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,128,1,fp8,fp8,0,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.026549334327379864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,fp8,0,0.26852800448735553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,fp8,0,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,fp8,0,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,128,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,fp8,0,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,128,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,128,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,float16,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,float16,0,0.69268266359965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,4.058815956115723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,fp8,0,0.7258133093516032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,128,1,fp8,fp8,0,0.6387733221054077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,4.06825606028239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,3.7468268076578775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,float16,0,0.7139093081156412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,4.074192047119141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,fp8,0,0.7142079671223959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,128,1,fp8,fp8,0,0.6752106348673502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,4.085477193196614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,3.7623573939005532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,float16,0,0.724671999613444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,4.0954132080078125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,fp8,0,0.7327733039855957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,128,1,fp8,fp8,0,0.6743520100911459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,4.103871981302897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,3.784133275349935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,float16,0,0.41495466232299805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,2.1553014119466147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,fp8,0,0.42450666427612305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,128,1,fp8,fp8,0,0.3951840003331502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,2.165775934855143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,1.999301274617513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,float16,0,0.3715680042902629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,2.104661305745443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,fp8,0,0.3752373456954956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,128,1,fp8,fp8,0,0.3444586594899495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,2.1091787020365396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,1.9464693069458008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,float16,0,0.3763253291447957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,2.110106627146403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,fp8,0,0.38038400808970135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,128,1,fp8,fp8,0,0.3506400187810262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,2.114192008972168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,1.9520799318949382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,float16,0,0.38526399930318195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,2.119765281677246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,fp8,0,0.3885759909947713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,128,1,fp8,fp8,0,0.35814932982126874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,2.124821345011393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,1.9615467389424641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,float16,0,0.23540266354878744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,1.1567359765370686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,fp8,0,0.24108266830444336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,128,1,fp8,fp8,0,0.22721600532531738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,1.162607987721761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,1.0766293207804363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,float16,0,0.21176000436147055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,1.1285759607950847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,fp8,0,0.21228800217310587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,128,1,fp8,fp8,0,0.1995946764945984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,1.1303306420644124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,1.0480373700459797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,float16,0,0.21432000398635864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,1.1302613417307537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,fp8,0,0.21603200833002725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,128,1,fp8,fp8,0,0.21011734008789062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,1.1317013104756672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,1.0510986646016438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,float16,0,0.2214720050493876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,1.1412266890207927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,fp8,0,0.22192533810933432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,128,1,fp8,fp8,0,0.2078346610069275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,1.139514684677124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,1.055999994277954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,float16,0,0.166810671488444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,0.6798559824625651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,fp8,0,0.1669386625289917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,128,1,fp8,fp8,0,0.15677332878112793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,0.6786879698435465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,0.6259520053863525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,float16,0,0.1648426651954651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,0.6746719678243002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,fp8,0,0.16454399625460306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,128,1,fp8,fp8,0,0.15370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,0.6747252941131592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,0.6265706618626913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,float16,0,0.1646666626135508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,0.6742773056030273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,fp8,0,0.16482133666674295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,128,1,fp8,fp8,0,0.15455466508865356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,0.673408031463623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,0.6248319943745931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,float16,0,0.1649066706498464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,0.6747146447499593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,fp8,0,0.16590399543444315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,128,1,fp8,fp8,0,0.1552853286266327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,0.6989386876424154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,0.6276533206303915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,float16,0,0.5251893202463785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,2.4191999435424805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,fp8,0,0.5288373231887817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,128,1,fp8,fp8,0,0.47967998186747235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,2.4209386507670083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,2.2319893836975098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,float16,0,0.5307039817174276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,2.427349408467611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,fp8,0,0.536512017250061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,128,1,fp8,fp8,0,0.4903626839319865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,2.432783921559652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,2.242250601450602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,float16,0,0.5429493188858032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,2.4415626525878906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,fp8,0,0.5500106811523438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,128,1,fp8,fp8,0,0.5056213140487671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,2.448570728302002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,2.2577706972757974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,float16,0,0.3158986568450928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,1.3071520328521729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,fp8,0,0.3235039909680684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,128,1,fp8,fp8,0,0.3019413352012634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,1.3136746883392334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,1.2127306461334229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,float16,0,0.2821386655171712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,1.2675573031107585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,fp8,0,0.285589337348938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,128,1,fp8,fp8,0,0.26310932636260986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,1.2702720165252686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,1.174021323521932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,float16,0,0.28568534056345624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,1.2726666927337646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,fp8,0,0.28763200839360553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,128,1,fp8,fp8,0,0.26713067293167114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,1.2744426727294922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,1.1788853009541829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,float16,0,0.2935520013173421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,1.2801067034403484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,fp8,0,0.2977919975916545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,128,1,fp8,fp8,0,0.275653342405955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,1.2837440172831218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,1.1851413249969482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,float16,0,0.1793173352877299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,0.7157706419626871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,fp8,0,0.1846826672554016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,128,1,fp8,fp8,0,0.17497599124908447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,0.7195573647816976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,0.6675253709157308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,float16,0,0.1590826710065206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,0.6913653214772543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,fp8,0,0.16058133045832315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,128,1,fp8,fp8,0,0.15200000007947287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,0.6934453646341959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,0.6435306469599406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,float16,0,0.16268799702326456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,float16,0,0.04610666632652283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,fp8,0,0.16402133305867514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,0.6945707003275553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,128,1,fp8,fp8,0,0.1553439994653066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,0.6981759866078695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,0.6494986613591512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,float16,0,0.16631999611854553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,fp8,0,0.16873600085576376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,0.6986719767252604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,128,1,fp8,fp8,0,0.16019200285275778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,0.7014773686726888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,0.6536106665929159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,float16,0,0.12567999958992004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,0.4336906671524048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,fp8,0,0.1253439982732137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,128,1,fp8,fp8,0,0.11941333611806233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,0.43649065494537354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,0.40480534235636395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,float16,0,0.1239413321018219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,0.43033599853515625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,fp8,0,0.12378666798273723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,128,1,fp8,fp8,0,0.11769066254297893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,0.43219200770060223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,0.40190398693084717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,float16,0,0.12443733215332031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,0.43139199415842694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,fp8,0,0.12422933181126912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,128,1,fp8,fp8,0,0.11767466862996419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,0.4327733516693115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,0.40036265055338544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,float16,0,0.12367467085520427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,0.43118401368459064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,fp8,0,0.12410666545232137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,128,1,fp8,fp8,0,0.11746133367220561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,0.4321013291676839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,0.40059200922648114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,float16,0,0.4383893410364787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,fp8,0,0.44355201721191406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,128,1,fp8,fp8,0,0.4044693311055501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,1.7536640167236328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,1.6375679969787598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,1.75928529103597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,float16,0,0.44434134165445965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,fp8,0,0.4482560157775879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,1.762122631072998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,128,1,fp8,fp8,0,0.40881065527598065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,1.76583464940389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,1.6255626678466797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,float16,0,0.4535200198491414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,fp8,0,0.46000532309214276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,1.7735840479532878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,128,1,fp8,fp8,0,0.42178666591644287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,1.7815732955932617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,1.6391733487447102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,float16,0,0.2633440097173055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,fp8,0,0.27531200647354126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,0.9599466323852539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,128,1,fp8,fp8,0,0.2526560028394063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,0.9663200378417969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,0.8922186692555746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,float16,0,0.23403199513753256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,fp8,0,0.23672000567118326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,0.9260853131612142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,128,1,fp8,fp8,0,0.22035733858744302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,0.9269013404846191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,0.8834239641825358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,float16,0,0.2381920019785563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,fp8,0,0.24661332368850708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,0.9284853140513102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,128,1,fp8,fp8,0,0.22721600532531738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,0.9324586391448975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,0.8619679609934489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,float16,0,0.24484266837437949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,fp8,0,0.24799466133117676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,0.9352746804555258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,128,1,fp8,fp8,0,0.230186661084493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,0.9409013589223226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,0.8711253007253011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,float16,0,0.15427199999491373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,0.5337599913279215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,fp8,0,0.16077867150306702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,128,1,fp8,fp8,0,0.15001599987347922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,0.5353759924570719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,0.49945068359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,float16,0,0.1360266705354055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,fp8,0,0.13776533802350363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,0.5124533176422119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,128,1,fp8,fp8,0,0.12783466776212057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,0.5148213307062784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,0.47596800327301025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,float16,0,0.13800000150998434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,fp8,0,0.1397599975268046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,0.5146293242772421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,128,1,fp8,fp8,0,0.12988266348838806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,0.5160053173700968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,0.49820268154144287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,float16,0,0.14138133327166238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,fp8,0,0.14384532968203226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,0.5191146532694498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,128,1,fp8,fp8,0,0.13595199584960938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,0.519045352935791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,0.4842453400293986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,float16,0,0.11145599683125813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,0.33132266998291016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,fp8,0,0.11153067151705424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,128,1,fp8,fp8,0,0.1055573324362437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,0.3320586681365967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,0.3081279993057251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,float16,0,0.11098666985829671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,fp8,0,0.10955733060836792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,0.32897599538167316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,128,1,fp8,fp8,0,0.10318932930628459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,0.3299466570218404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,0.3076106707255046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,float16,0,0.10964799920717876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,0.32846933603286743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,fp8,0,0.10935466488202412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,128,1,fp8,fp8,0,0.1032960017522176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,0.3300693432490031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,0.3058826724688212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,float16,0,0.11123733719189961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,0.3306453426678975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,fp8,0,0.10958932836850484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,128,1,fp8,fp8,0,0.10347200433413188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,0.32842665910720825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,0.30692799886067706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,float16,0,0.6770026683807373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,fp8,0,0.6828213532765707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,2.3112319310506186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,128,1,fp8,fp8,0,0.6201706727345785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,2.12501859664917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,2.3183679580688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,float16,0,0.6890453497568766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,fp8,0,0.6961332956949869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,2.3258506457010903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,128,1,fp8,fp8,0,0.6351733207702637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,2.140181382497152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,2.331535975138346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,float16,0,0.7207252979278564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,fp8,0,0.7135146458943685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,2.3477706909179688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,128,1,fp8,fp8,0,0.6562720139821371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,2.354325294494629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,2.1677707036336265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,float16,0,0.398906668027242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,fp8,0,0.4084426561991374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,1.2474026679992676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,128,1,fp8,fp8,0,0.3794826666514079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,1.256602684656779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,1.15775465965271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,float16,0,0.352944016456604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,fp8,0,0.3633013168970744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,1.195199966430664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,128,1,fp8,fp8,0,0.3264533281326294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,1.1982666651407878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,1.1031893094380696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,float16,0,0.35793598492940265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,fp8,0,0.36208534240722656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,1.2015360196431477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,128,1,fp8,fp8,0,0.3328746755917867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,1.2035199801127117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,1.1114880243937175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,float16,0,0.3673386573791504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,fp8,0,0.37115732828776044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,1.2111999988555908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,128,1,fp8,fp8,0,0.34112000465393066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,1.217077334721883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,1.1170399983723958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,float16,0,0.21776533126831055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,fp8,0,0.2223200003306071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,0.6667466958363851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,128,1,fp8,fp8,0,0.20844266812006632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,0.672655979792277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,0.6217866738637289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,float16,0,0.19157866636912027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,fp8,0,0.19709332784016928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,0.6350186665852865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,128,1,fp8,fp8,0,0.1795039971669515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,0.6378080050150553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,0.5940800110499064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,float16,0,0.1934880018234253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,fp8,0,0.1955946683883667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,0.6395359834035238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,128,1,fp8,fp8,0,0.18435200055440268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,0.6424053510030111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,0.5962026516596476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,float16,0,0.2016106645266215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,0.6470933357874552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,fp8,0,0.2069973349571228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,128,1,fp8,fp8,0,0.19051200151443481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,0.6483946641286215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,0.6027946472167969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,0.37555734316507977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,fp8,0,0.12946133812268576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,fp8,fp8,0,0.12353600064913432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,0.3962026834487915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,0.3547626733779907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,float16,0,0.11338133613268535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,0.36154667536417645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,fp8,0,0.11555733283360799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,128,1,fp8,fp8,0,0.10549333691596985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,0.36346666018168133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,0.3330079913139343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,float16,0,0.11321066816647847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,0.36241598924001056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,fp8,0,0.11528000235557556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,128,1,fp8,fp8,0,0.10719466209411621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,0.36209599177042645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,0.33486934502919513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,float16,0,0.11525332927703857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,0.365392009417216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,fp8,0,0.11938132842381795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,128,1,fp8,fp8,0,0.1120853324731191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,0.36771198113759357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,0.3375733296076457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,float16,0,0.09021866321563721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.2399466633796692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,fp8,0,0.09101866682370503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,128,1,fp8,fp8,0,0.0867680013179779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.2403306762377421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,0.22405334313710532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,float16,0,0.08905067046483357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.23921066522598267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,fp8,0,0.09097066521644592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,128,1,fp8,fp8,0,0.08483200271924336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.2407146692276001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,0.22403200467427573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,float16,0,0.08910399675369263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.2387253244717916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,fp8,0,0.0906933347384135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,128,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.2398080031077067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,0.2239146629969279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,float16,0,0.0888266662756602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.24032533168792725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,fp8,0,0.09083200494448344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,128,1,fp8,fp8,0,0.08492799599965413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.24061334133148193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,0.2239146629969279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,float16,0,0.511247992515564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,1.4149813652038574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,fp8,0,0.5150933265686035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,128,1,fp8,fp8,0,0.4660319884618123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,1.422159989674886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,1.303653319676717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,float16,0,0.5182933410008749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,1.4263893763224285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,fp8,0,0.5238826672236124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,128,1,fp8,fp8,0,0.4943893353144328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,1.432213306427002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,1.3151520093282063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,float16,0,0.5310133298238119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,1.4409599304199219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,fp8,0,0.5371786753336588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,128,1,fp8,fp8,0,0.4922186533610026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,1.4472692807515461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,1.3311200141906738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,float16,0,0.3047893246014913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,0.7799466451009115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,fp8,0,0.3104693293571472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,128,1,fp8,fp8,0,0.2887466748555501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,0.786672035853068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,0.7260586420694987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,float16,0,0.2672213315963745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,0.7385226885477701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,fp8,0,0.2705013354619344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,128,1,fp8,fp8,0,0.24869867165883383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,0.7434026400248209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,0.6851786772410074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,float16,0,0.2714186708132426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,0.7429172992706299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,fp8,0,0.2752373417218526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,128,1,fp8,fp8,0,0.2549920082092285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,0.7485439777374268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,0.6911253134409586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,float16,0,0.27953600883483887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,0.7535999615987142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,fp8,0,0.2853599985440572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,128,1,fp8,fp8,0,0.27109867334365845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,0.7591573397318522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,0.6954186757405599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,float16,0,0.16562666495641074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,0.423311988512675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,fp8,0,0.17037334044774374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,128,1,fp8,fp8,0,0.16065067052841187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,0.42929065227508545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,0.3975733518600464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,float16,0,0.14236266414324442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,0.40035200119018555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,fp8,0,0.1453439990679423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,128,1,fp8,fp8,0,0.13455466429392496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,0.4026133219401042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,0.37187735239664715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,float16,0,0.14469866951306662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,0.4014773368835449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,fp8,0,0.14677332838376364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,128,1,fp8,fp8,0,0.13857600092887878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,0.40478400389353436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,0.3753546476364136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,float16,0,0.1502079963684082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,0.42451198895772296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,fp8,0,0.1583573321501414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,128,1,fp8,fp8,0,0.14603199561436972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,0.41256535053253174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,0.38095466295878094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,float16,0,0.09469866752624512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.24466667572657266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,fp8,0,0.09631466865539551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,128,1,fp8,fp8,0,0.0976746678352356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.2582613428433736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,0.23412799835205078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,float16,0,0.0853653351465861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.23502933979034424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,fp8,0,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,128,1,fp8,fp8,0,0.08085333307584126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.23668799797693887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,0.21974400679270426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,float16,0,0.08734933535257976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.23639466365178427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,fp8,0,0.08721066514650981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,128,1,fp8,fp8,0,0.08130133152008057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.23669866720835367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,0.21988266706466675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,float16,0,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.2383520007133484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,128,1,fp8,fp8,0,0.08277333279450734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.2389813264211019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,0.22195200125376383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,float16,0,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.16792533795038858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,128,1,fp8,fp8,0,0.06443200012048085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.16766933600107828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.15569600462913513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,float16,0,0.06864533325036366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.1666933298110962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,fp8,0,0.068271999557813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,128,1,fp8,fp8,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.16730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.15587733189264932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,float16,0,0.06814399858315785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.1665013333161672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,fp8,0,0.07014399766921997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,128,1,fp8,fp8,0,0.06613333523273468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.1669493317604065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.15640532970428467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,float16,0,0.06853333115577698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.16683199008305868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,fp8,0,0.06877333422501881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,128,1,fp8,fp8,0,0.06469866633415222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.16752000649770102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.15634666879971823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,float16,0,0.6775626341501871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,1.4281919797261555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,fp8,0,0.7035626570383707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,128,1,fp8,fp8,0,0.6153279940287272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,1.436255931854248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,1.312725305557251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,float16,0,0.6873493194580078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,1.4467573165893555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,fp8,0,0.6928799947102865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,128,1,fp8,fp8,0,0.629472017288208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,1.4519359270731609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,1.3283627033233643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,float16,0,0.7099040349324545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,1.4706400235493977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,fp8,0,0.7115093072255453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,128,1,fp8,fp8,0,0.6530773242314657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,1.475338617960612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,1.3499679565429688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,float16,0,0.39270933469136554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,0.7878986994425455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,fp8,0,0.39828264713287354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,128,1,fp8,fp8,0,0.3712746699651082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,0.7950613498687744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,0.7324533462524414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,float16,0,0.34592000643412274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,0.7365813255310059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,fp8,0,0.34811198711395264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,128,1,fp8,fp8,0,0.31836267312367755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,0.7386559645334879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,0.6785919666290283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,float16,0,0.35146665573120117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,0.7406187057495117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,fp8,0,0.35470934708913165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,128,1,fp8,fp8,0,0.3367893298467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,0.7468213240305582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,0.6832533677419027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,float16,0,0.360586682955424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,0.7522613207499186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,fp8,0,0.3640906810760498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,128,1,fp8,fp8,0,0.3333599964777629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,0.7550346851348877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,0.6930186748504639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,float16,0,0.20958399772644043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,0.4191146691640218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,fp8,0,0.21474132935206094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,128,1,fp8,fp8,0,0.19988266626993814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,0.42207467555999756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,0.39110398292541504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,float16,0,0.18120000759760538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,0.3882346550623576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,fp8,0,0.1844266653060913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,128,1,fp8,fp8,0,0.1724053422609965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,0.3896106481552124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,0.36243200302124023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,float16,0,0.1851466695467631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,0.3928426504135132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,fp8,0,0.18729066848754883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,128,1,fp8,fp8,0,0.17663466930389404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,0.39506133397420246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,0.36770133177439374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,float16,0,0.1918506622314453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,0.3992053270339966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,fp8,0,0.19357866048812866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,128,1,fp8,fp8,0,0.1811786691347758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,0.40167466799418133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,0.3753066857655843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,float16,0,0.11606933673222859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.23289066553115845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,fp8,0,0.11989866693814595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,128,1,fp8,fp8,0,0.11539199948310852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.2348533272743225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,0.22194133202234903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,float16,0,0.10283733407656352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.2185386617978414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,fp8,0,0.10646399855613708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,128,1,fp8,fp8,0,0.09593600034713745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.22074133157730103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.20337599515914917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,float16,0,0.10374400019645691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.21904534101486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,fp8,0,0.10550399621327718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,128,1,fp8,fp8,0,0.09661333759625752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.22216532627741495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.20379199584325156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,float16,0,0.10578133662541707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.22130133708318075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,128,1,fp8,fp8,0,0.10121599833170573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.22408533096313477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.20787733793258667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,float16,0,0.06851733227570851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.14145599802335104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,fp8,0,0.0702453354994456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,128,1,fp8,fp8,0,0.06825600067774455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.14350400368372598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.13405866424242655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,float16,0,0.06454400221506755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.13805866241455078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,fp8,0,0.06449066599210103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,128,1,fp8,fp8,0,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.13754666845003763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.12594133615493774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,float16,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.13614933689435324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,fp8,0,0.06454400221506755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,128,1,fp8,fp8,0,0.060047999024391174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.138154665629069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.12626133362452188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,float16,0,0.06557866434256236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.13795733451843262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,fp8,0,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,128,1,fp8,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.13874666889508566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.1280693312486013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,float16,0,0.05452266832192739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.09911466638247173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,fp8,0,0.056330665946006775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,128,1,fp8,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.09943999846776326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.09311999877293904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,float16,0,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.09916266798973083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,128,1,fp8,fp8,0,0.05226133267084757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.10127466917037964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.09325333436330159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,float16,0,0.05414933462937673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.0992746651172638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,fp8,0,0.05445333321889242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,128,1,fp8,fp8,0,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.10108799735705058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.09303466478983562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,float16,0,0.05455466608206431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.0993226667245229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,128,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.10107733805974324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.09310932954152425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,float16,0,0.5076213280359904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,0.9150880177815756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,fp8,0,0.5108960072199503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,128,1,fp8,fp8,0,0.46348798274993896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,0.9189919630686442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,0.8364799817403158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,float16,0,0.5345653295516968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,0.9192372957865397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,fp8,0,0.5186506509780884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,128,1,fp8,fp8,0,0.4912319978078206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,0.9234346548716227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,0.8479733467102051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,float16,0,0.5273013512293497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,0.9337120056152344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,fp8,0,0.531269351641337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,128,1,fp8,fp8,0,0.5036266644795736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,0.9396533171335856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,0.8627893129984537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,float16,0,0.298581341902415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,0.5144000053405762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,fp8,0,0.3050453265508016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,128,1,fp8,fp8,0,0.28382933139801025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,0.5207839806874593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,0.480954647064209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,float16,0,0.2643839915593465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,0.475109338760376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,fp8,0,0.2646399935086568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,128,1,fp8,fp8,0,0.2435200015703837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,0.47630401452382404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,0.4379253387451172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,float16,0,0.26639999945958454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,0.4986720085144043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,fp8,0,0.26901866992314655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,128,1,fp8,fp8,0,0.24809066454569498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,0.4806346495946248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,0.44630932807922363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,float16,0,0.2741333246231079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,0.48762667179107666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,fp8,0,0.2780746618906657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,128,1,fp8,fp8,0,0.25650133689244586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,0.48955734570821124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,0.4506453275680542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,float16,0,0.16060266892115274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,0.2775733272234599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,fp8,0,0.16409599781036377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,128,1,fp8,fp8,0,0.15455999970436096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,0.2814026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,0.26124799251556396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,float16,0,0.13640532890955606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.2526666720708211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,fp8,0,0.13889066378275552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,128,1,fp8,fp8,0,0.12997866670290628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.2532320022583008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,0.2355146606763204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,float16,0,0.13825066884358725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.2544800043106079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,fp8,0,0.1420746644337972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,128,1,fp8,fp8,0,0.13403733571370444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.25597333908081055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,0.24055467049280801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,float16,0,0.146096001068751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.26119999090830487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,fp8,0,0.1474399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,128,1,fp8,fp8,0,0.1402720014254252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.26282666126887005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,0.246671994527181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,float16,0,0.08679999907811482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.15625066558519998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,fp8,0,0.09083200494448344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,128,1,fp8,fp8,0,0.08902399738629659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.16022933522860208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.15172800421714783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,float16,0,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.14802133043607077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,fp8,0,0.08057599763075511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,128,1,fp8,fp8,0,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.1485973298549652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.136245330174764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,float16,0,0.07875200112660725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.14820266763369241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,fp8,0,0.08088533580303192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,128,1,fp8,fp8,0,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.15042133132616678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.13795733451843262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,float16,0,0.08060800035794576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.14918933312098184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,fp8,0,0.08109333117802937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,128,1,fp8,fp8,0,0.07663999994595845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.15215466419855753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.14036267002423605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,float16,0,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.09935999910036723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,128,1,fp8,fp8,0,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.1016533374786377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.09528533617655437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.09730133414268494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,128,1,fp8,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.09756267070770264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.09098133444786072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,float16,0,0.05056533217430115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.09742933511734009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,128,1,fp8,fp8,0,0.04814399778842926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.09726400176684062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.09074133634567261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,float16,0,0.052101333936055504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.09896533687909444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,fp8,0,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,128,1,fp8,fp8,0,0.04958933095137278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.09853866696357727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.09121599793434143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,float16,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.0684746652841568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,128,1,fp8,fp8,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.07022400200366974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.06667733192443848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,float16,0,0.04208533465862274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.06929066777229309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,fp8,0,0.042591998974482216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,128,1,fp8,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.0688213308652242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.06493866443634033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,float16,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.0689333329598109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,fp8,0,0.04233600199222565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,128,1,fp8,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.07039999961853027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.06593066453933716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,float16,0,0.041333332657814026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.06854933500289917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,128,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.07028266787528992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.0660693347454071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,float16,0,0.706922690073649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,1.0227733453114827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,fp8,0,0.7075573603312174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,128,1,fp8,fp8,0,0.6317973136901855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,1.0220906734466553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,0.9251946608225504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,float16,0,0.7214773495992025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,1.0343466599782307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,fp8,0,0.7191893259684244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,128,1,fp8,fp8,0,0.6387199958165487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,1.0298773447672527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,0.947370688120524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,float16,0,0.7388479709625244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,1.0537066459655762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,fp8,0,0.7333599726359049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,128,1,fp8,fp8,0,0.679807980855306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,1.0476640065511067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,0.9540853500366211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,float16,0,0.4022879997889201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,0.567418654759725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,fp8,0,0.4034026861190796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,128,1,fp8,fp8,0,0.3762933413187663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,0.5692640145619711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,0.5293013254801432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,float16,0,0.3601813316345215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,0.5296213229497274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,fp8,0,0.3521973292032878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,128,1,fp8,fp8,0,0.3218239943186442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,0.5139093399047852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,0.47226134936014813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,float16,0,0.35659201939900714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,0.5183146794637045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,fp8,0,0.35769065221150714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,128,1,fp8,fp8,0,0.3285920023918152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,0.5210560162862142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,0.478005329767863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,float16,0,0.3675413529078166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,0.5477386713027954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,fp8,0,0.3702826499938965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,128,1,fp8,fp8,0,0.3387306531270345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,0.5312960147857666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,0.4891519943873088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,float16,0,0.21029333273569742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,0.30025599400202435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,fp8,0,0.21693867444992065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,128,1,fp8,fp8,0,0.199178675810496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,0.30002667506535846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,0.27908267577489215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,float16,0,0.1795413295427958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.2655093272527059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,fp8,0,0.18101867039998373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,128,1,fp8,fp8,0,0.16916267077128092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.26661866903305054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,0.249834676583608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,float16,0,0.18297600746154785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.2695359985033671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,fp8,0,0.1860640048980713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,128,1,fp8,fp8,0,0.17287999391555786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.2690773407618205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,0.2526719967524211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,float16,0,0.1901706655820211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.27609066168467206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,fp8,0,0.1929439902305603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,128,1,fp8,fp8,0,0.18120533227920532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.27747199932734173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,0.26014933983484906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,float16,0,0.11407466729482015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.16331199804941812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,fp8,0,0.1160586675008138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,128,1,fp8,fp8,0,0.11145599683125813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.1650773286819458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.15636266271273294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,float16,0,0.09726933638254802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.14645333091417947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,fp8,0,0.09961066643397014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,128,1,fp8,fp8,0,0.09080533186594646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.14878400166829428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.13645333051681519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,float16,0,0.099263995885849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.1470026671886444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,fp8,0,0.1018506685892741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,128,1,fp8,fp8,0,0.0934879978497823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.15095999836921692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.13706666231155396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,float16,0,0.10156800349553426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.15075733264287314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,fp8,0,0.10550399621327718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,128,1,fp8,fp8,0,0.09759466846783955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.15287466843922934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.14268799622853598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,float16,0,0.06451733410358429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.09541333715120952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,fp8,0,0.06685866912206014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,128,1,fp8,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.09710933764775594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.09109866619110107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,float16,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.09178666273752849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,128,1,fp8,fp8,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.09117866555849712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.08338133494059245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,float16,0,0.06031466523806254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.09233599901199341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,fp8,0,0.06221333146095276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,128,1,fp8,fp8,0,0.056277334690093994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.09149332841237386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.08469866712888081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,float16,0,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.09250133236249287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,fp8,0,0.06250133117039998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,128,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.09321066737174988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.0848586658636729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,float16,0,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.06628266473611195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,fp8,0,0.04690133531888326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,128,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.06620799998442332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.06053866446018219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,float16,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.06381866832574208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,128,1,fp8,fp8,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,float16,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.06445866823196411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,128,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.05806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,float16,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.06426666676998138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,128,1,fp8,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.06004266440868378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,float16,0,0.03402133285999298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.050288001696268715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,128,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.04886933167775472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,float16,0,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.05037866532802582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,fp8,0,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,128,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.05217599868774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.048357332746187844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,fp8,0,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,128,1,fp8,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,float16,0,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.049882665276527405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,fp8,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,128,1,fp8,fp8,0,0.03317866722742716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.050714666644732155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,float16,0,0.5037866830825806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,0.6597493489583334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,fp8,0,0.5069226821263632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,128,1,fp8,fp8,0,0.4606613318125407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,0.6618080139160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,0.6064960161844889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,float16,0,0.5191786686579386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,0.6717600027720133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,fp8,0,0.520799994468689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,128,1,fp8,fp8,0,0.47233064969380695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,0.6765600045522054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,0.6178986628850301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,float16,0,0.527621348698934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,0.6858239968617758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,fp8,0,0.5353920062383016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,128,1,fp8,fp8,0,0.4870719909667969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,0.6887040138244629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,0.6310880184173584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,float16,0,0.2959146698315938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,0.38230399290720624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,fp8,0,0.305893341700236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,128,1,fp8,fp8,0,0.286463995774587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,0.3908160130182902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,0.3637760082880656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,float16,0,0.2582826614379883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,0.33958399295806885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,fp8,0,0.2609279950459798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,128,1,fp8,fp8,0,0.23983466625213623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,0.343664010365804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,0.31888000170389813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,float16,0,0.26311999559402466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,0.3433973391850789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,fp8,0,0.26666667064030963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,128,1,fp8,fp8,0,0.24681599934895834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,0.34783466657002765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,0.3236959973971049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,float16,0,0.2712000012397766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,0.3524640003840129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,fp8,0,0.2751893401145935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,128,1,fp8,fp8,0,0.25281065702438354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,0.35835198561350506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,0.33033066987991333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,float16,0,0.1586720049381256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.20503999789555868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,fp8,0,0.1627893348534902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,128,1,fp8,fp8,0,0.15447466572125754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.20961600542068481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.1972000002861023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,float16,0,0.13404267032941183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.17860267559687296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,fp8,0,0.1360373298327128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,128,1,fp8,fp8,0,0.1269866625467936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.18206934134165445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.1695893406867981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,float16,0,0.1362399955590566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.1814240018526713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,fp8,0,0.14012266198794046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,128,1,fp8,fp8,0,0.13035733501116434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.18524799744288126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.1730133295059204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,float16,0,0.14149866501490274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.18686399857203165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,fp8,0,0.144896000623703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,128,1,fp8,fp8,0,0.13803199927012125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.1893493334452311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.17940799395243326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,float16,0,0.08563733100891113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.1128480037053426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,fp8,0,0.08893866340319316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,128,1,fp8,fp8,0,0.08684800068537395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.11559999982515971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.11214933792750041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,float16,0,0.07500799993673961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.10267200072606404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,fp8,0,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,128,1,fp8,fp8,0,0.07053333520889282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.1051573355992635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.09693333506584167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,float16,0,0.07627200086911519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.1032319962978363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,fp8,0,0.07885866860548656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,128,1,fp8,fp8,0,0.07249600191911061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.10556800166765849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.0972106655438741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,float16,0,0.07765333354473114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.10523733496665955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,fp8,0,0.07916266719500224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,128,1,fp8,fp8,0,0.0729066679875056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.10744000474611919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.09905067086219788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,float16,0,0.049882665276527405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.07062933345635732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,fp8,0,0.05208000044027964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,128,1,fp8,fp8,0,0.04827733337879181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.07228800157705943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.06810666620731354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,float16,0,0.04637866715590159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.06613866488138835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,128,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.06862933437029521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.06829866766929626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,128,1,fp8,fp8,0,0.04409066836039225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.062133332093556724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.06851733227570851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,fp8,0,0.04827733337879181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,128,1,fp8,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.07047999898592631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,float16,0,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,fp8,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,128,1,fp8,fp8,0,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.04432533184687296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,float16,0,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.04470933477083842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,fp8,0,0.03617066641648611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,128,1,fp8,fp8,0,0.03234666585922241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.04158399999141693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,float16,0,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.045456002155939736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.04418666660785675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,128,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,128,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.04193066557248434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,128,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.04266133407751719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,128,1,fp8,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,float16,0,0.5906560023625692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,0.6869493325551351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,fp8,0,0.5890506505966187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,128,1,fp8,fp8,0,0.5436426798502604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,0.6878346602121989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,0.6375093460083008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,float16,0,0.6023093461990356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,0.703333298365275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,fp8,0,0.6031200091044108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,128,1,fp8,fp8,0,0.5756906668345133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,0.7000693480173746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,0.6701760292053223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,float16,0,0.6071146726608276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,0.7087146441141764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,fp8,0,0.6039679845174154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,128,1,fp8,fp8,0,0.5819413264592489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,0.7045760154724121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,0.6731786727905273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,float16,0,0.333840012550354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,0.3877386649449666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,fp8,0,0.3273013234138489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,128,1,fp8,fp8,0,0.32343467076619464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,0.3835413455963135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,0.372655987739563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,float16,0,0.3042400081952413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,0.35469333330790204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,fp8,0,0.3050453265508016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,128,1,fp8,fp8,0,0.2839306592941284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,0.3558613459269206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,0.3301066756248474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,float16,0,0.3115893403689067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,0.36290132999420166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,fp8,0,0.31117333968480426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,128,1,fp8,fp8,0,0.2983040014902751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,0.3620213270187378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,0.3460640112559001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,float16,0,0.31301865975062054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,0.36510932445526123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,fp8,0,0.31278399626413983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,128,1,fp8,fp8,0,0.3009919921557109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,0.3657386700312297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,0.3509440024693807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,float16,0,0.17705066998799643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.20641599098841348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,fp8,0,0.1750453313191732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,128,1,fp8,fp8,0,0.17197332779566446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.20282665888468424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,0.19805334011713663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,float16,0,0.16077867150306702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.18964266777038574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,fp8,0,0.16104533274968466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,128,1,fp8,fp8,0,0.15050666530927023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.18863467375437418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.17787200212478638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,float16,0,0.16350932916005453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.19117865959803262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,fp8,0,0.16365333398183188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,128,1,fp8,fp8,0,0.15688000122706094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.1925920049349467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.18421866496404013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,float16,0,0.16596266627311707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.19435199101765951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,fp8,0,0.16697599490483603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,128,1,fp8,fp8,0,0.16168000300725302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.1936533252398173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.18716800212860107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,float16,0,0.09797867139180501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.11573867003122966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,fp8,0,0.09636800487836202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,128,1,fp8,fp8,0,0.09692800045013428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.11529067158699036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.11373333136240642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,float16,0,0.09102400143941243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.1074773371219635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,fp8,0,0.08970666925112407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,128,1,fp8,fp8,0,0.08201600114504497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.10739200313886006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.09822400410970052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,float16,0,0.09052266677220662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.10825600226720174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,fp8,0,0.09108799695968628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,128,1,fp8,fp8,0,0.08499733606974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.10826667149861653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.10135466853777568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,float16,0,0.09170666337013245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.1088853379090627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,fp8,0,0.09104532996813457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,128,1,fp8,fp8,0,0.08672533432642619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.107424000898997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.10216533144315083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,float16,0,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.06834133466084798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,fp8,0,0.05608533322811127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,128,1,fp8,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.06833066542943318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.06571733454863231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,float16,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.06594133377075195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,128,1,fp8,fp8,0,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.06479466458161671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,float16,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,fp8,0,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,128,1,fp8,fp8,0,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.06569066643714905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.061797335743904114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,float16,0,0.05628266433874766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.06673066814740498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,fp8,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,128,1,fp8,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.06656533479690552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,float16,0,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.04731200138727824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,fp8,0,0.04101333270470301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,128,1,fp8,fp8,0,0.03915733347336451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.04669333497683207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.04454400142033895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,float16,0,0.03834133346875509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,128,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.044480000933011375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.04243200023969015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,float16,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,128,1,fp8,fp8,0,0.03839466720819473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,float16,0,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,128,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.046485334634780884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,128,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,128,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,128,1,fp8,fp8,0,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,float16,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.03428266694148382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,fp8,0,0.027637332677841187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,128,1,fp8,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.03383466601371765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.03212266663710276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,float16,0,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.0301706666747729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.027727998793125153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,128,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,float16,0,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.029706666866938274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,float16,0,0.5718986590703329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,0.5766559839248657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,fp8,0,0.5698293447494507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,128,1,fp8,fp8,0,0.524341344833374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,0.5754613478978475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,0.5316746632258097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,float16,0,0.5845599969228109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,0.5900746583938599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,fp8,0,0.5814186731974283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,128,1,fp8,fp8,0,0.5539413293202718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,0.5894560019175211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,0.5608160098393759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,float16,0,0.5890080134073893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,0.5947626829147339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,fp8,0,0.5844320058822632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,128,1,fp8,fp8,0,0.558357318242391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,0.5916959842046102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,0.5669973293940226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,float16,0,0.3232266704241435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,0.32785600423812866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,fp8,0,0.31755733489990234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,128,1,fp8,fp8,0,0.31089067459106445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,0.3222026626269023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,0.3166399995485942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,float16,0,0.29388266801834106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.2980746626853943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,fp8,0,0.2951200008392334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,128,1,fp8,fp8,0,0.2714453339576721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.2998773256937663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.2758400042851766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,float16,0,0.30216532945632935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.3042400081952413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,fp8,0,0.3001493414243062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,128,1,fp8,fp8,0,0.286901334921519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.30341867605845135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,0.291706661383311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,float16,0,0.303658664226532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,0.305456002553304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,fp8,0,0.30185600121816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,128,1,fp8,fp8,0,0.2876533269882202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,0.30593599875768024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,0.2924586733182271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,float16,0,0.17254400253295898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.17570666472117105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,fp8,0,0.1681013305981954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,128,1,fp8,fp8,0,0.1646613379319509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.17119999726613364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.16849066813786825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,float16,0,0.1570133368174235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.15759999553362528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,fp8,0,0.15541332960128784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,128,1,fp8,fp8,0,0.144405335187912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.15795200069745383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.14802133043607077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,float16,0,0.15849600235621134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.16051733493804932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,fp8,0,0.15772799650828043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,128,1,fp8,fp8,0,0.14935466647148132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.1593706707159678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.15308800339698792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,float16,0,0.16217066844304404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.1637493371963501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,fp8,0,0.16087466478347778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,128,1,fp8,fp8,0,0.15550933281580606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.16185067097345987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.1563040018081665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,float16,0,0.09674132863680522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.0972106655438741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,fp8,0,0.09499733646710713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,128,1,fp8,fp8,0,0.09316266576449077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.09566400448481242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.09611733754475911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,float16,0,0.08975999553998311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.08974400162696838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,fp8,0,0.08846400181452434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,128,1,fp8,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.08827733000119527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.08097599943478902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,float16,0,0.0885813335577647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.08875200152397156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,fp8,0,0.08905067046483357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,128,1,fp8,fp8,0,0.0830080012480418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.08937066793441772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.08293333152929942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,float16,0,0.08989333113034566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.09077333410580952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,fp8,0,0.08973333239555359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,128,1,fp8,fp8,0,0.08525333801905315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.08948266506195068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.08549867073694865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,float16,0,0.0572213331858317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.0580320010582606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,fp8,0,0.055973331133524575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,128,1,fp8,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.05614933371543884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.05462400118509928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,float16,0,0.05417066812515259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,fp8,0,0.05340800185998281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,128,1,fp8,fp8,0,0.05046399931112925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.05559466779232025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,float16,0,0.054431999723116554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.05563733478387197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,fp8,0,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,128,1,fp8,fp8,0,0.05101333558559418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.05468266705671946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,float16,0,0.05486933390299479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,fp8,0,0.05436266462008158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,128,1,fp8,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.05470400055249532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.05208000044027964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,float16,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,128,1,fp8,fp8,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.03919466584920883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,float16,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,fp8,0,0.037903999288876854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,128,1,fp8,fp8,0,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,float16,0,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,128,1,fp8,fp8,0,0.03694933404525121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,float16,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.038618666430314384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,fp8,0,0.038586666186650596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,128,1,fp8,fp8,0,0.03658133248488108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,float16,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,128,1,fp8,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,128,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,float16,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,128,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,128,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,128,1,fp8,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,float16,0,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,128,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,float16,0,0.02380266785621643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,fp8,0,0.02380266785621643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,float16,0,0.021829334398110706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,128,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,float16,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,128,1,fp8,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,float16,0,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,float16,0,0.27688000599543255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.27162667115529376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,fp8,0,0.27748799324035645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,128,1,fp8,fp8,0,0.2546880046526591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.271232008934021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.2500106692314148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,float16,0,0.2845333417256673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.27806933720906574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,fp8,0,0.2832319935162862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,128,1,fp8,fp8,0,0.26925865809122723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.27663999795913696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.2635040084520976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,float16,0,0.28406933943430585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,0.27850667635599774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,fp8,0,0.28249067068099976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,128,1,fp8,fp8,0,0.27215466896692914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,0.27619733413060504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,0.2661973237991333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,float16,0,0.1616320013999939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.15869333346684775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,fp8,0,0.1590026617050171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,128,1,fp8,fp8,0,0.15823466579119363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.15617066621780396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.15427733461062113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,float16,0,0.14793599645296732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.14483732978502908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,fp8,0,0.1497760017712911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,128,1,fp8,fp8,0,0.13583999872207642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.1460479994614919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.13412800431251526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,float16,0,0.14967466394106546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.14739200472831726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,fp8,0,0.15050666530927023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,128,1,fp8,fp8,0,0.1415733297665914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.14809067050615946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.1381653348604838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,float16,0,0.15131733814875284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.1497813363869985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,fp8,0,0.15179199973742166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,128,1,fp8,fp8,0,0.14783466855684915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.15029333035151163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.1442346672217051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,float16,0,0.09123733639717102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.0893440047899882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,fp8,0,0.09037866195042928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,128,1,fp8,fp8,0,0.08961600065231323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.08917867143948872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,float16,0,0.08323733508586884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.08227199812730153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,fp8,0,0.0843999981880188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,128,1,fp8,fp8,0,0.07706666489442189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.08502933382987976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.0767680009206136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,float16,0,0.08382933338483174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.08290666838486989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,fp8,0,0.083146666487058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,128,1,fp8,fp8,0,0.07704000174999237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.08292266726493835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.07686399916807811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,float16,0,0.086517333984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.08449066678682964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,fp8,0,0.08428266644477844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,128,1,fp8,fp8,0,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.08364267150561015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.08029866715272267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,float16,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.05041066805521647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,128,1,fp8,fp8,0,0.05028266708056132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.05082133412361145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,float16,0,0.05030400057633718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.04987200101216634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,128,1,fp8,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.04845866560935974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,float16,0,0.05080533524354299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,128,1,fp8,fp8,0,0.04712533454100291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.04704533517360687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,float16,0,0.051130667328834534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,128,1,fp8,fp8,0,0.046442667643229164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.04959466556708018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.03713600089152654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,128,1,fp8,fp8,0,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.03498666733503342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,float16,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,128,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.03429333368937174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,float16,0,0.03716266651948293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,128,1,fp8,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,float16,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.03612266729275385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,128,1,fp8,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,128,1,fp8,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,128,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,128,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,128,1,fp8,fp8,0,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,float16,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,128,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,128,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,128,1,fp8,fp8,0,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,float16,0,0.15491200486818948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.15496533115704855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,fp8,0,0.15429866313934326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,128,1,fp8,fp8,0,0.14713066816329956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.1548640032609304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.14569066961606345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,float16,0,0.15653866529464722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.1560533344745636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,fp8,0,0.15618133544921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,128,1,fp8,fp8,0,0.15130666891733804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.15618133544921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.1514240006605784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,float16,0,0.15839466452598572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.15863466262817383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,fp8,0,0.15772799650828043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,128,1,fp8,fp8,0,0.1562986671924591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.1579253375530243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.156031996011734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,float16,0,0.09285333752632141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.09324799974759419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,fp8,0,0.09284800291061401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,128,1,fp8,fp8,0,0.09372799595197041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.09083200494448344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.09358400106430054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,float16,0,0.08517332871754964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.0848533312479655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,fp8,0,0.08616532882054646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,128,1,fp8,fp8,0,0.08095466593901317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.08659199873606364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.08203200002511342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,float16,0,0.08565333485603333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.08711466193199158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,fp8,0,0.08530132969220479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,128,1,fp8,fp8,0,0.08162133395671844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.08556266625722249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,float16,0,0.08658132950464885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.0869760016600291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,fp8,fp8,0,0.0846560001373291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.08717866738637288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.0853706697622935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,float16,0,0.053130666414896645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.053157334526379905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,fp8,0,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,128,1,fp8,fp8,0,0.05274133384227753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.0537120004494985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,float16,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.0518506666024526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,fp8,0,0.050997331738471985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,128,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.050442665815353394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,float16,0,0.05269866685072581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.05218133330345154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,fp8,0,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,128,1,fp8,fp8,0,0.050069332122802734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,float16,0,0.052522664268811546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.05272000034650167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,fp8,0,0.051925331354141235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,128,1,fp8,fp8,0,0.04990399877230326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,fp8,0,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,128,1,fp8,fp8,0,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,float16,0,0.033999999364217125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.033904001116752625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,128,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,float16,0,0.034874667723973594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.034927998979886375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,fp8,0,0.03503466645876566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,128,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,128,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.03270400067170461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,float16,0,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.026586666703224182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,128,1,fp8,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,128,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,128,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,float16,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,128,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,128,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,128,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,float16,0,0.11170666416486104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,fp8,0,0.11301866173744202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,128,1,fp8,fp8,0,0.10557867089907329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.11162133018175761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.1049013336499532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,float16,0,0.11274666587511699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.11199466387430827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,fp8,0,0.11341333389282227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,128,1,fp8,fp8,0,0.10724266370137532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.11215999722480774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.10565867026646932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,float16,0,0.11372266213099162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.11344533165295918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,fp8,0,0.11358933647473653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,128,1,fp8,fp8,0,0.10923199852307637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.11269866426785786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.10807466506958008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,float16,0,0.06828799843788147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.06695466736952464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,fp8,0,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,128,1,fp8,fp8,0,0.06559466818968455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.06612266600131989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,float16,0,0.06422933439413707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.06442666550477345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,fp8,0,0.06459199885527293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,128,1,fp8,fp8,0,0.06164266665776571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.064560001095136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,float16,0,0.06426133215427399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.06448533137639363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,fp8,0,0.06492266555627187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,128,1,fp8,fp8,0,0.062133332093556724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,float16,0,0.06529599924882253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.06502399841944377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,fp8,0,0.06483200192451477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,128,1,fp8,fp8,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.0614879975716273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,float16,0,0.042090664307276406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,fp8,0,0.04274133344491323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,128,1,fp8,fp8,0,0.0421066681543986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.04339733223120371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.04231466849644979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,float16,0,0.04011200120051702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.04088533421357473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,128,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,float16,0,0.04208533465862274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,128,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,float16,0,0.04119466741879781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.041375999649365745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,fp8,0,0.04111466556787491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,128,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,128,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,128,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,128,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.029050665597120922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.02828266719977061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,fp8,0,0.028912000358104706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,128,1,fp8,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,128,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,128,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,128,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,128,1,fp8,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,128,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,128,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,float16,0,0.0906773308912913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.09077866872151692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,fp8,0,0.09076266487439473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,128,1,fp8,fp8,0,0.08478933572769165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.09116799632708232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.08518399794896443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,float16,0,0.09110400080680847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.09093333284060161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,fp8,0,0.09096533060073853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,128,1,fp8,fp8,0,0.08473066488901775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.09139200051625569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.0860640009244283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,float16,0,0.09150933225949605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.09100266297658284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,fp8,0,0.09113599856694539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,128,1,fp8,fp8,0,0.08462933699289958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.09131733576456706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.08656000097592671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,float16,0,0.05500799914201101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.05473599831263224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,fp8,0,0.055498664577802025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,128,1,fp8,fp8,0,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,float16,0,0.05453333258628845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,128,1,fp8,fp8,0,0.050901333491007485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,float16,0,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.05527999997138977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,fp8,0,0.05417066812515259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,128,1,fp8,fp8,0,0.051669334371884666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.05226133267084757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,float16,0,0.054234668612480164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.054789334535598755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,fp8,0,0.0543146679798762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,128,1,fp8,fp8,0,0.05156800150871277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.05141866703828176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,float16,0,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.03714133302370707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,128,1,fp8,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.03584533433119456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.0359199990828832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,128,1,fp8,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.035743998984495796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,128,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,float16,0,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,128,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,128,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,float16,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,float16,0,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,float16,0,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,128,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,float16,0,0.020266667008399963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,128,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,128,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,128,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,float16,0,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,float16,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,128,1,fp8,fp8,0,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,float16,0,0.08019199967384338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.08030400176843007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,fp8,0,0.08017066617806752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,128,1,fp8,fp8,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.08077866832415263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.0765066643555959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,float16,0,0.08078399797280629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.08082133531570435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,fp8,0,0.08051200211048126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,128,1,fp8,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.08078399797280629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.07673066854476929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,float16,0,0.0809333324432373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.08148266871770223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,fp8,0,0.08072000245253245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,128,1,fp8,fp8,0,0.07601066430409749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.0806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,float16,0,0.04980266590913137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.049733335773150124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,fp8,0,0.04937600096066793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,128,1,fp8,fp8,0,0.048394665122032166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.050373335679372154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,float16,0,0.050101334849993386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,fp8,0,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,128,1,fp8,fp8,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.048309331138928734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,fp8,0,0.048245335618654885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,128,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.04808000226815542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,float16,0,0.04993066688378652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.050106664498647056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,fp8,0,0.04849599798520406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,128,1,fp8,fp8,0,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,float16,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,128,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,128,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,128,1,fp8,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,float16,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,128,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,128,1,fp8,fp8,0,0.022831998765468597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,128,1,fp8,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,128,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,float16,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,128,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,float16,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,float16,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,128,1,fp8,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,float16,0,0.45763734976450604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,2.7418346405029297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,fp8,0,0.46144533157348633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,128,1,fp8,fp8,0,0.4310826857884725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,2.745701471964518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,2.5463786125183105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,float16,0,0.4711039861043294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,2.7525014877319336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,fp8,0,0.4798613389333089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,float16,0,0.12760000427563986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,128,1,fp8,fp8,0,0.4460800091425578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,2.7563254038492837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,2.5507307052612305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,float16,0,0.4842880169550578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,fp8,0,0.49103466669718426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,128,1,fp8,fp8,0,0.46433067321777344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,2.766127904256185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,2.7774985631306968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,2.5718560218811035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,float16,0,0.2773866653442383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,fp8,0,0.2839679916699727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,128,1,fp8,fp8,0,0.26897599299748737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,1.471461296081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,1.478256066640218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,1.4004213015238445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,float16,0,0.24540799856185913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,fp8,0,0.24872533480326334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,1.4357760747273762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,128,1,fp8,fp8,0,0.23428799708684286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,1.3384586970011394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,1.4406240781148274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,float16,0,0.25071465969085693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,fp8,0,0.25464000304539997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,1.4398880004882812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,128,1,fp8,fp8,0,0.24251733223597208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,1.3442880312601726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,1.4452533721923828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,float16,0,0.26065067450205487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,fp8,0,0.2638346751530965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,1.4528907140096028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,128,1,fp8,fp8,0,0.2507573366165161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,1.4559574127197266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,1.353973388671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,float16,0,0.15839999914169312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,fp8,0,0.16518400112787882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,0.8104373613993326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,128,1,fp8,fp8,0,0.15635733803113303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,0.8131146430969238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,0.7551733652750651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,float16,0,0.1399946709473928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,fp8,0,0.14005866646766663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,0.7880373001098633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,128,1,fp8,fp8,0,0.13429866234461466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,0.7881653308868408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,0.735210657119751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,float16,0,0.14231466253598532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,fp8,0,0.1442293326059977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,0.7905706564585367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,128,1,fp8,fp8,0,0.14160533746083578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,0.7917226950327555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,0.7370453675587972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,float16,0,0.1483466625213623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,fp8,0,0.15043200055758157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,0.7963360150655111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,128,1,fp8,fp8,0,0.14607999722162882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,0.8172586758931478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,0.7469706535339355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,float16,0,0.11424000064531963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,0.4894506533940633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,fp8,0,0.11451733112335205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,128,1,fp8,fp8,0,0.10950400431950887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,0.4900906483332316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,0.456767996152242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,float16,0,0.11343466242154439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,fp8,0,0.11343999703725179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,0.488042672475179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,128,1,fp8,fp8,0,0.10764799515406291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,0.488597313563029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,float16,0,0.11357866724332173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,0.45402665932973224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,fp8,0,0.11343999703725179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,0.48870933055877686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,128,1,fp8,fp8,0,0.10817600289980571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,0.48840534687042236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,0.45393598079681396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,float16,0,0.11357333262761433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,0.4878506660461426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,fp8,0,0.1132426659266154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,128,1,fp8,fp8,0,0.10902399818102519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,0.4880959987640381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,0.4549226760864258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,float16,0,0.3469173510869344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,fp8,0,0.350602666536967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,128,1,fp8,fp8,0,0.32808534304300946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,1.633562723795573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,1.6431946754455566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,1.5212213198343914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,float16,0,0.3558719952901204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,fp8,0,0.36001066366831463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,1.646085262298584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,128,1,fp8,fp8,0,0.3396799961725871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,1.5317333539326985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,1.6507892608642578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,float16,0,0.36841599146525067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,fp8,0,0.3718986511230469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,1.6592159271240234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,128,1,fp8,fp8,0,0.35205864906311035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,1.6650560696919758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,1.5461440086364746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,float16,0,0.21192532777786255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,fp8,0,0.22285866737365723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,0.8988213539123535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,128,1,fp8,fp8,0,0.20774932702382407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,0.905839999516805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,0.8417440255482992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,float16,0,0.18788800636927286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,0.8714346885681152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,fp8,0,0.18973867098490396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,128,1,fp8,fp8,0,0.18147200345993042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,0.8733386993408203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,float16,0,0.19105599323908487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,0.813530683517456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,fp8,0,0.1944426695505778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,0.8748213450113932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,128,1,fp8,fp8,0,0.1858773430188497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,0.8765066464742025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,0.8181706269582113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,float16,0,0.19894399245580038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,fp8,0,0.2016800045967102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,0.8847253322601318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,128,1,fp8,fp8,0,0.19582400719324747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,0.8865280151367188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,0.8269920349121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,float16,0,0.1255466639995575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,0.5059893528620402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,fp8,0,0.12610133488972983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,128,1,fp8,fp8,0,0.12388267119725545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,0.5190773407618204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,0.4742879867553711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,float16,0,0.11334400375684102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,fp8,0,0.11170132954915364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,0.4899413188298543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,128,1,fp8,fp8,0,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,0.49114668369293213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,0.4553333520889282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,float16,0,0.11147200067838033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,fp8,0,0.11360533038775127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,0.4911359945933024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,128,1,fp8,fp8,0,0.1092800001303355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,0.49240533510843915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,0.4617120027542114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,float16,0,0.11595732967058818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,fp8,0,0.1181119978427887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,0.4968159993489583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,128,1,fp8,fp8,0,0.11385066310564677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,0.49861331780751544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,0.4640266497929891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,float16,0,0.0885599950949351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,fp8,0,0.08902933200200398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,128,1,fp8,fp8,0,0.0863146682580312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,0.31717334191004437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,0.2960853377978007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,float16,0,0.08785067001978557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,0.31631465752919513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,fp8,0,0.08898133039474487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,128,1,fp8,fp8,0,0.08451199531555176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,0.3174186746279399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,0.2978026668230693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,float16,0,0.08815466364224751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,0.3172053297360738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,fp8,0,0.08867200215657552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,128,1,fp8,fp8,0,0.08479467034339905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,0.3176640073458354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,0.2977013389269511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,float16,0,0.08725866675376892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,0.31641600529352826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,fp8,0,0.08886399865150452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,128,1,fp8,fp8,0,0.08515733480453491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,0.3179466724395752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,0.2962453365325928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,float16,0,0.28754132986068726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,1.1911253134409587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,fp8,0,0.28999465703964233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,128,1,fp8,fp8,0,0.27215999364852905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,1.1949386596679688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,1.108575979868571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,float16,0,0.29526400566101074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,1.1958826382954915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,fp8,0,0.2964800000190735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,128,1,fp8,fp8,0,0.28428266445795697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,1.1992533206939697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,1.1248693466186523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,float16,0,0.30424533287684125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,1.2108373641967773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,fp8,0,0.3087573250134786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,128,1,fp8,fp8,0,0.29178667068481445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,1.215557336807251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,1.1273706754048665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,float16,0,0.17865600188573202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,0.662826657295227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,fp8,0,0.1840426723162333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,128,1,fp8,fp8,0,0.17612799008687338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,0.667738676071167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,0.6223839918772379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,float16,0,0.15657066305478415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,0.640607992808024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,fp8,0,0.15864533185958862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,128,1,fp8,fp8,0,0.15223466356595358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,0.6419413487116495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,0.5968213478724161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,float16,0,0.160671999057134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,0.6447466611862183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,fp8,0,0.1630453368028005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,128,1,fp8,fp8,0,0.15689067045847574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,0.6464906533559164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,0.6031999985376993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,float16,0,0.16710400581359863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,0.6507893403371176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,fp8,0,0.17115734020868936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,128,1,fp8,fp8,0,0.16291200121243796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,0.654416004816691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,0.6112800041834513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,float16,0,0.10453866918881734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,0.37805867195129395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,fp8,0,0.10753066341082256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,128,1,fp8,fp8,0,0.10522133111953735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,0.38149333000183105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,0.3575093348821004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,float16,0,0.09491200248400371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,0.36753066380818683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,fp8,0,0.0960693359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,128,1,fp8,fp8,0,0.09080533186594646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,0.36961066722869873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,0.3428479830423991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,float16,0,0.0972160001595815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,0.36954665184020996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,128,1,fp8,fp8,0,0.09118933478991191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,0.3715946674346924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,0.3448053201039632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,float16,0,0.09913067022959392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,0.37229867776234943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,fp8,0,0.10121599833170573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,128,1,fp8,fp8,0,0.09717333316802979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,0.37560534477233887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,0.3493013381958008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,float16,0,0.07905066510041554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.2487786610921224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,fp8,0,0.07828266421953838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,128,1,fp8,fp8,0,0.07455466687679291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.24778666098912558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,0.23228800296783447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,float16,0,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.24891199668248495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,fp8,0,0.07851733267307281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,128,1,fp8,fp8,0,0.07455466687679291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.24866666396458945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,0.23233066002527872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,float16,0,0.07878933350245158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.24931732813517252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,fp8,0,0.07841599980990092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,128,1,fp8,fp8,0,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.24846933285395303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,0.23228800296783447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,float16,0,0.07835733393828075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.24875734249750772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,128,1,fp8,fp8,0,0.0761653333902359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.24850666522979736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,0.2323626677195231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,float16,0,0.447002649307251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,1.5565706888834636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,fp8,0,0.45047998428344727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,128,1,fp8,fp8,0,0.41946665445963544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,1.5598506927490234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,1.4438986778259277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,float16,0,0.4607199827829997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,1.5712587038675945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,fp8,0,0.4642560084660848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,128,1,fp8,fp8,0,0.4352853298187256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,1.5741066932678223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,1.4604479471842449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,float16,0,0.4795626799265544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,1.5870240529378254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,fp8,0,0.4859946568806966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,128,1,fp8,fp8,0,0.4553333520889282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,1.5900853474934895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,1.4770240783691406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,float16,0,0.26498132944107056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,0.8507680098215739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,fp8,0,0.27128533522288006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,128,1,fp8,fp8,0,0.25893332560857135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,0.854586680730184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,0.7933066685994467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,float16,0,0.23333332935969034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,0.8200266361236572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,fp8,0,0.2367253303527832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,128,1,fp8,fp8,0,0.22428266207377115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,0.8167946338653564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,0.7594666481018066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,float16,0,0.2391093373298645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,0.8203893502553304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,fp8,0,0.24289067586263022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,128,1,fp8,fp8,0,0.23056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,0.8210773468017578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,0.7651573022206625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,float16,0,0.24753600358963013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,0.833301305770874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,fp8,0,0.25390400489171344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,128,1,fp8,fp8,0,0.24053333202997842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,0.8330399990081787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,0.7763626575469971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,float16,0,0.1444906691710154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,0.46378668149312335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,fp8,0,0.14844266573588052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,128,1,fp8,fp8,0,0.14262400070826212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,0.4679786761601766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,0.4368906815846761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,float16,0,0.1242026686668396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,0.4418773253758748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,fp8,0,0.12774933377901712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,128,1,fp8,fp8,0,0.12156800429026286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,0.4448373317718506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,0.4145600001017253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,float16,0,0.12760000427563986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,0.4446400006612142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,fp8,0,0.1292586624622345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,128,1,fp8,fp8,0,0.1253919998804728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,0.44758399327596027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,0.41892798741658527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,float16,0,0.13397333025932312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,0.45187731583913165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,fp8,0,0.13591999808947244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,128,1,fp8,fp8,0,0.1323306659857432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,0.45389334360758465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,0.42685333887736004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,float16,0,0.08477333188056946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.26899733146031696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,fp8,0,0.08702400326728821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,128,1,fp8,fp8,0,0.08627733588218689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.2720800042152405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,0.2569653391838074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,float16,0,0.0784853349129359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.2630239923795064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,fp8,0,0.08101333181063335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,128,1,fp8,fp8,0,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.2637493411699931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,0.24612800280253092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,float16,0,0.07896533111731212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.2640373309453328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,fp8,0,0.08083733419577281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,128,1,fp8,fp8,0,0.07684266567230225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.2641119956970215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,0.24580266078313193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,float16,0,0.0806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.26531734069188434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,fp8,0,0.08293866614500682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,128,1,fp8,fp8,0,0.07893333335717519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.26703999439875287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,0.24868800242741904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,float16,0,0.06285333136717479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.1832533280054728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,fp8,0,0.06365333497524261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,128,1,fp8,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.18342934052149454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.1728853384653727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,float16,0,0.0626453310251236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.18329066038131714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,fp8,0,0.06437333424886067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,128,1,fp8,fp8,0,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.1831573247909546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.17311465740203857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,float16,0,0.06278400123119354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.18321067094802856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,fp8,0,0.06371733546257019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,128,1,fp8,fp8,0,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.1834239959716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.17259732882181802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,float16,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.18494399388631186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,fp8,0,0.06325866778691609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,128,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.18519999583562216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.17287999391555786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,float16,0,0.33897598584493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,0.9576480388641357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,fp8,0,0.3430826663970947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,128,1,fp8,fp8,0,0.31842132409413654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,0.9615360101064047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,0.8912106355031332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,float16,0,0.3510506550470988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,0.9695519606272379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,fp8,0,0.352730671564738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,128,1,fp8,fp8,0,0.3306933244069417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,0.971290667851766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,0.9017600218454996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,float16,0,0.36161601543426514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,0.981013298034668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,fp8,0,0.36373333136240643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,128,1,fp8,fp8,0,0.3441813389460246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,0.9862240155537924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,0.9140586853027344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,float16,0,0.2065920035044352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,0.5397440195083618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,fp8,0,0.20907199382781982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,128,1,fp8,fp8,0,0.1997013290723165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,0.5404586791992188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,0.502618670463562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,float16,0,0.17818133036295572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,0.5046240091323853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,fp8,0,0.18000533183415732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,128,1,fp8,fp8,0,0.17165333032608032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,0.5081173181533813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,0.4745546579360962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,float16,0,0.1832053263982137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,0.5107733408610026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,fp8,0,0.1853440006573995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,128,1,fp8,fp8,0,0.1771626671155294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,0.5146133502324423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,0.47973334789276123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,float16,0,0.19139200448989868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,0.5194026629130045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,fp8,0,0.1943946679433187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,128,1,fp8,fp8,0,0.18519467115402222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,0.5239946842193604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,0.48872534434000653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,float16,0,0.11437867085138957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,0.2977546652158101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,fp8,0,0.11668800314267476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,128,1,fp8,fp8,0,0.11355200409889221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,0.30109866460164386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,0.2834399938583374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,float16,0,0.10055999954541524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,0.28270934025446576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,fp8,0,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,128,1,fp8,fp8,0,0.09643200039863586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,0.28462932507197064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,0.2651413281758626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,float16,0,0.10466133554776509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.28562132517496747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,fp8,0,0.10416000088055928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,128,1,fp8,fp8,0,0.09884799520174663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,0.285861333211263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,0.2669866681098938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,float16,0,0.10756267110506694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.2898079951604207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,128,1,fp8,fp8,0,0.10359999537467957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.2919626633326213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,0.273306667804718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,float16,0,0.06636266907056172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.1787733236948649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,fp8,0,0.06849066913127899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,128,1,fp8,fp8,0,0.06666133304437001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.179530660311381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.16902933518091837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,float16,0,0.0625600020090739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.17510932683944702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,fp8,0,0.06381333371003468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,128,1,fp8,fp8,0,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.17485866943995157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.16276266177495322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,float16,0,0.06448533137639363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.1753973364830017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,fp8,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,128,1,fp8,fp8,0,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.17495467265446982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.1630826691786448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,float16,0,0.06444799900054932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.17677332957585654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,fp8,0,0.06523199876149495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,128,1,fp8,fp8,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.17731199661890665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.16551466782887778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,float16,0,0.05414933462937673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.12577066818873087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,128,1,fp8,fp8,0,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.12572266658147177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.11777599652608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,float16,0,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.12402666608492534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,fp8,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,128,1,fp8,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.12572266658147177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.117658664782842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,float16,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.12588799993197122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,fp8,0,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,128,1,fp8,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.12453333536783855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.11595732967058818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,float16,0,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.12595199545224509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,128,1,fp8,fp8,0,0.052202666799227394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.12594667077064514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.11776533722877502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,float16,0,0.4469013214111328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,0.9601759910583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,fp8,0,0.4491893450419108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,128,1,fp8,fp8,0,0.41673068205515545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,0.9634933471679688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,0.8919306596120199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,float16,0,0.4610613187154134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,0.9749226570129395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,fp8,0,0.46406400203704834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,128,1,fp8,fp8,0,0.43354666233062744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,0.9767200152079264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,0.9066932996114095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,float16,0,0.4764159917831421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,0.9927786986033121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,fp8,0,0.4805920124053955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,128,1,fp8,fp8,0,0.4503733317057292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,0.9947146574656168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,0.9246666431427002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,float16,0,0.26261333624521893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,0.533946673075358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,fp8,0,0.2672853271166484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,128,1,fp8,fp8,0,0.2547253370285034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,0.5375680128733317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,0.5023306608200073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,float16,0,0.22913066546122232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,0.4978293180465698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,fp8,0,0.23181867599487305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,128,1,fp8,fp8,0,0.21967466672261557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,0.5007839997609457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,0.4660053253173828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,float16,0,0.23460266987482706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,0.504741350809733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,fp8,0,0.23823465903600058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,128,1,fp8,fp8,0,0.2258453369140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,0.5063413381576538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,0.47394665082295734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,float16,0,0.24459733565648398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,0.5136160055796305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,fp8,0,0.2486720085144043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,128,1,fp8,fp8,0,0.23427200317382812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,0.5191253423690796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,0.48391465346018475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,float16,0,0.13912000258763632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,0.2880213260650635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,fp8,0,0.143696000178655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,128,1,fp8,fp8,0,0.13910399874051413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,0.29203200340270996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,0.27498666445414227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,float16,0,0.12026133139928182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.2692213257153829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,fp8,0,0.12349866827329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,128,1,fp8,fp8,0,0.1169599990049998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.2720053394635518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,0.25278933842976886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,float16,0,0.12341866890589397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.2716533342997233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,fp8,0,0.12589866916338602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,128,1,fp8,fp8,0,0.11983467141787212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.2755146622657776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,0.2587466637293498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,float16,0,0.12974933783213297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.2784319917360942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,fp8,0,0.1325226624806722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,128,1,fp8,fp8,0,0.12793599565823874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.28145599365234375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,0.2651093403498332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,float16,0,0.07864533364772797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.16479466358820596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,fp8,0,0.08086933195590973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,128,1,fp8,fp8,0,0.08057599763075511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.1704746683438619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.1607306698958079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,float16,0,0.0720000018676122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.15843733151753744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,fp8,0,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,128,1,fp8,fp8,0,0.06836266815662384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.1588053305943807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.1483840048313141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,float16,0,0.07246399919191997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.1585813363393148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,fp8,0,0.07443733513355255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,128,1,fp8,fp8,0,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.16090133786201477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.14969066778818765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,float16,0,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.1606666644414266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,fp8,0,0.07619200150171916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,128,1,fp8,fp8,0,0.07275733351707458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.1625546713670095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.15238933761914572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,float16,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.10750933488210042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,128,1,fp8,fp8,0,0.048063998421033226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.10970667004585266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.10181867082913716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,float16,0,0.04801600178082784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.1053653359413147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,fp8,0,0.04663466910521189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,128,1,fp8,fp8,0,0.04470400015513102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.10526399811108907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.09849599997202556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,float16,0,0.0469813346862793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.10512533783912659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,128,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.1053706705570221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.09898133079210918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.10520000259081523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,fp8,0,0.04955733319123586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.10745599865913391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.09993599851926167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,float16,0,0.038032000263532005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.0769706666469574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,128,1,fp8,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.07749333480993907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.07258133093516032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,float16,0,0.03909866760174433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.07717333237330119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,fp8,0,0.03806400050719579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,128,1,fp8,fp8,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.07710400223731995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.0726560006539027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,128,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.07665066421031952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,128,1,fp8,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.07669866581757863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.072202667593956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,float16,0,0.34518933296203613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,0.6215306520462036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,fp8,0,0.34541332721710205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,128,1,fp8,fp8,0,0.322432001431783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,0.6235040028889974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,0.5793226559956869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,float16,0,0.35658665498097736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,0.6330399910608927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,fp8,0,0.3569493293762207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,128,1,fp8,fp8,0,0.3344533443450928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,0.6338826815287272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,0.5899093151092529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,float16,0,0.36840001742045086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,0.6465493440628052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,fp8,0,0.37017067273457843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,128,1,fp8,fp8,0,0.34842665990193683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,0.6467039982477824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,0.6035306851069132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,float16,0,0.20592000087102255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,0.3531893491744995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,fp8,0,0.2074399987856547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,128,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,0.35574932893117267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,0.3338613510131836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,float16,0,0.17565866311391196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,0.3235146601994832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,fp8,0,0.17729600270589194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,128,1,fp8,fp8,0,0.17086933056513467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,0.32417066891988117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,0.30477867523829144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,float16,0,0.181167999903361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,0.32676267623901367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,fp8,0,0.18313600619633993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,128,1,fp8,fp8,0,0.17665600776672363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,0.3285173376401265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,0.3110453287760417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,float16,0,0.19115199645360312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,0.3381066719690959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,fp8,0,0.19333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,128,1,fp8,fp8,0,0.18367467323939005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,0.3392639954884847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,0.3188106616338094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,float16,0,0.10979732871055603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.1917919913927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,fp8,0,0.11314666271209717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,128,1,fp8,fp8,0,0.1113759974638621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.19547200202941895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.1872426668802897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,float16,0,0.09507733583450317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.1777120033899943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,fp8,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,128,1,fp8,fp8,0,0.08984532952308655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.17917867501576742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.16635200381278992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,float16,0,0.0958666702111562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.17865600188573202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,fp8,0,0.09922666351000468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,128,1,fp8,fp8,0,0.09284266829490662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.1811573306719462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.1693120002746582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,float16,0,0.10108799735705058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.1829493244489034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,fp8,0,0.10332799951235454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,128,1,fp8,fp8,0,0.0992693305015564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.1869866649309794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.1757226586341858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,float16,0,0.06187733511130015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.11190932989120483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,fp8,0,0.06407466530799866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,128,1,fp8,fp8,0,0.06201066573460897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.11547733346621196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.10964799920717876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,float16,0,0.05702400207519531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.10759466886520386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,128,1,fp8,fp8,0,0.05449066559473673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.10993066430091858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.1019040048122406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,float16,0,0.05863999823729197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.10987733801205952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,fp8,0,0.05955199897289276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,128,1,fp8,fp8,0,0.05644799768924713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.11124799648920695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.10309867064158122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,float16,0,0.0591893345117569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.10949333508809407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,fp8,0,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,128,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.10522133111953735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,float16,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.07439466814200084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,128,1,fp8,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.07643733421961467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,float16,0,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.0734506646792094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,fp8,0,0.04318400224049886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,128,1,fp8,fp8,0,0.03998400022586187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.07250133156776428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,float16,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.07230933507283528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,fp8,0,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,128,1,fp8,fp8,0,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.07418133318424225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.07018133501211803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,float16,0,0.04242133100827535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.07441600163777669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,128,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.0747519979874293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.07025599976380666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,float16,0,0.033157333731651306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,fp8,0,0.03384000062942505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,128,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,float16,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,128,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,float16,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.061946665247281395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,128,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.06215466558933258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.05835733314355215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,float16,0,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,128,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.06204266846179962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,float16,0,0.47051199277242023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,0.6838080088297526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,fp8,0,0.4676693280537923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,128,1,fp8,fp8,0,0.42929601669311523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,0.6821066538492838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,0.6280479828516642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,float16,0,0.4852373202641805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,0.6990079879760742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,fp8,0,0.48526934782663983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,128,1,fp8,fp8,0,0.4409866730372111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,0.7004853089650472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,0.6403359969456991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,float16,0,0.4987200101216634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,0.7114400068918864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,fp8,0,0.4960639874140422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,128,1,fp8,fp8,0,0.4561973412831624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,0.7099573612213135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,0.6551466782887777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,float16,0,0.2700000007947286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,0.3850933313369751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,fp8,0,0.2722986737887065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,128,1,fp8,fp8,0,0.25915733973185223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,0.3863573471705119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,0.36345068613688153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,float16,0,0.2334346572558085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,0.34437867005666095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,fp8,0,0.2338506579399109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,128,1,fp8,fp8,0,0.22109333674112955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,0.3450506528218587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,0.3248213330904643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,float16,0,0.24076799551645914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,0.35184534390767414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,fp8,0,0.24226667483647665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,128,1,fp8,fp8,0,0.22984532515207926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,0.35233068466186523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,0.3332746624946594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,float16,0,0.2498613397280375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,0.363264004389445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,fp8,0,0.25355732440948486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,128,1,fp8,fp8,0,0.23913600047429404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,0.36535465717315674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,0.34272531668345135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,float16,0,0.14189866185188293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.20509866873423258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,fp8,0,0.14442666371663412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,128,1,fp8,fp8,0,0.1400053302447001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.2071466644605001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.1966773271560669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,float16,0,0.11874133348464966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.18121065696080527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,fp8,0,0.12179733316103618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,128,1,fp8,fp8,0,0.11569600303967793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.18524799744288126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.17405333121617636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,float16,0,0.12171733379364014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.18467199802398682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,fp8,0,0.1255626678466797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,128,1,fp8,fp8,0,0.11968533198038737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.1864746610323588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.17706666390101114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,float16,0,0.1281599998474121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.19087467590967813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,fp8,0,0.1311306655406952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,128,1,fp8,fp8,0,0.12780800461769104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.19370667139689127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.18554667631785074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,float16,0,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.1113866666952769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,fp8,0,0.0787360022465388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,128,1,fp8,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.11550399661064148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.11303466558456421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,float16,0,0.07031466563542683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.10520533720652263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,fp8,0,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,128,1,fp8,fp8,0,0.06629866858323415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.10733866691589355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.0995093286037445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,float16,0,0.07050666709740956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.10548800230026245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,fp8,0,0.07246399919191997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,128,1,fp8,fp8,0,0.06820266445477803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.10710400342941284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.10083733002344768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,float16,0,0.07183466851711273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.10706133643786113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,fp8,0,0.07480533421039581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,128,1,fp8,fp8,0,0.07189866900444031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.10983999570210774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.10471999645233154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,float16,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,128,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.0724533349275589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.06862933437029521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,float16,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.06860266625881195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,128,1,fp8,fp8,0,0.041936000188191734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.0705386648575465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.0662666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,float16,0,0.04359999795754751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.07011733452479045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,128,1,fp8,fp8,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.07036800185839336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.064751997590065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,float16,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,fp8,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,128,1,fp8,fp8,0,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.07238933444023132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.06796800096829732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,float16,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,128,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.050026665131251015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,float16,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.04637866715590159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,128,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,float16,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.047594666481018066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,128,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.04819199939568838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,float16,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,128,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.04674666623274485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,float16,0,0.029893333713213604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,fp8,0,0.02978666623433431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,128,1,fp8,fp8,0,0.02792533238728841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.04610666632652283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.04407466451327006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,float16,0,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,128,1,fp8,fp8,0,0.02828266719977061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.043434664607048035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,float16,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,128,1,fp8,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,float16,0,0.028912000358104706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,128,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.043466667334238686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,float16,0,0.34641067186991376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,0.45026131470998126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,fp8,0,0.3457173506418864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,128,1,fp8,fp8,0,0.3221386671066284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,0.4522826671600342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,0.4230186541875203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,float16,0,0.35818668206532794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,0.46376001834869385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,fp8,0,0.3603146473566691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,128,1,fp8,fp8,0,0.33631467819213867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,0.4673706690470378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,0.4355253378550212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,float16,0,0.37094934781392414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,0.47806398073832196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,fp8,0,0.37251734733581543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,128,1,fp8,fp8,0,0.34829334417978924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,0.47991466522216797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,0.447434663772583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,float16,0,0.2055786649386088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,0.2643893361091614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,fp8,0,0.20785599946975708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,128,1,fp8,fp8,0,0.19850132862726846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,0.2677813371022542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,0.2534133394559224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,float16,0,0.17327467600504556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.2299786607424418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,fp8,0,0.17505067586898804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,128,1,fp8,fp8,0,0.16709866126378378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.2330346703529358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.22219200929005942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,float16,0,0.18026133378346762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.237226665019989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,fp8,0,0.18197333812713623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,128,1,fp8,fp8,0,0.17437867323557535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.23834667603174844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.22734399636586508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,float16,0,0.1900426745414734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.24728532632191977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,fp8,0,0.19171732664108276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,128,1,fp8,fp8,0,0.1835199991861979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.25005332628885907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,0.23783999681472778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,float16,0,0.10961600144704182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.1404213309288025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,fp8,0,0.11017599701881409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,128,1,fp8,fp8,0,0.1088746686776479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.1442346672217051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.1402133305867513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,float16,0,0.09310400485992432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.12521599729855856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,fp8,0,0.09506666660308838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,128,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.12736533085505167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.11973866820335388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,float16,0,0.0951039989789327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.12667199969291687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,fp8,0,0.09889066219329834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,128,1,fp8,fp8,0,0.09355200330416362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.12948800126711527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.12298666437466939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,float16,0,0.09876267115275066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.13008532921473184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,fp8,0,0.10095999638239543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,128,1,fp8,fp8,0,0.09847999612490337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.13427199920018515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.1300320029258728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,float16,0,0.060880000392595925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.08179733157157898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,128,1,fp8,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.08460799853006999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.08287466565767924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,float16,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,128,1,fp8,fp8,0,0.0547626664241155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.07923200229803722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.0749120016892751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,float16,0,0.05825600028038025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.0792799989382426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,fp8,0,0.05852266649405161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,128,1,fp8,fp8,0,0.05450133482615153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.07912000020345052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.07503466804822286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,float16,0,0.058415999015172325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.08002133170763652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,128,1,fp8,fp8,0,0.05650666852792104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.08115200201670329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.054229333996772766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,fp8,0,0.04205866654713949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,128,1,fp8,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.054714664816856384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,float16,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.053120002150535583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,fp8,0,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,128,1,fp8,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.048250665267308555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,float16,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,fp8,0,0.039994666973749794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,128,1,fp8,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.05231466889381409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,128,1,fp8,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.05027199784914652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,float16,0,0.02865600089232127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,128,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,float16,0,0.02757333219051361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,fp8,0,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,128,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.040106666584809623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.035887998839219414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,128,1,fp8,fp8,0,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,128,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.037808001041412354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,128,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,float16,0,0.4017866849899292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,0.4680746793746948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,fp8,0,0.3965119918187459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,128,1,fp8,fp8,0,0.3718506495157878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,0.46678932507832843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,0.4368106524149577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,float16,0,0.4049866596857707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,0.47162667910257977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,fp8,0,0.4043039878209432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,128,1,fp8,fp8,0,0.3797279993693034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,0.47176531950632733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,0.44368000825246173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,float16,0,0.41469868024190265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,0.4819999933242798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,fp8,0,0.4129653374354045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,128,1,fp8,fp8,0,0.40643731753031415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,0.48423465092976886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,0.4721440076828003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,float16,0,0.21704532702763876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,0.2545173366864522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,fp8,0,0.2161173423131307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,128,1,fp8,fp8,0,0.22023467222849527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,0.2521599928538005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,0.25307732820510864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,float16,0,0.2097919980684916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.24438399076461792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,fp8,0,0.20832000176111856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,128,1,fp8,fp8,0,0.19655466079711914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.24481600522994995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.23025067647298178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,float16,0,0.2135093410809835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.2477333347002665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,fp8,0,0.2116053303082784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,128,1,fp8,fp8,0,0.2000746726989746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.24765332539876303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.2347253362337748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,float16,0,0.21787200371424356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.25334399938583374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,fp8,0,0.218122661113739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,128,1,fp8,fp8,0,0.2087413271268209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.25542400280634564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,0.24180267254511514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,float16,0,0.11988799770673116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.1418186624844869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,fp8,0,0.1183626651763916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,128,1,fp8,fp8,0,0.11975466211636861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.14089600245157877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.13991467157999674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,float16,0,0.11370666821797688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.134442667166392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,fp8,0,0.11358933647473653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,128,1,fp8,fp8,0,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.1344000001748403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.12779733538627625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,float16,0,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.1360266705354055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,fp8,0,0.1136853297551473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,128,1,fp8,fp8,0,0.1095199982325236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.13641599814097086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.1299199958642324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,float16,0,0.11974933743476868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.14069333672523499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,fp8,0,0.11829333504041036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,128,1,fp8,fp8,0,0.11558933059374492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.14031466841697693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.1327786644299825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,float16,0,0.06676800052324931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.07981866598129272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,fp8,0,0.06654400130112965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,128,1,fp8,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.08052266637484233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,float16,0,0.06563733518123627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.07713066538174947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,128,1,fp8,fp8,0,0.06092800199985504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.07262399792671204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,float16,0,0.0662666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.07846400141716003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,fp8,0,0.0661653329928716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,128,1,fp8,fp8,0,0.06237866481145223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.0727946658929189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,float16,0,0.0673173318306605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.07913066446781158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,128,1,fp8,fp8,0,0.06482666730880737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.07869866490364075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,float16,0,0.04293866455554962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.05219733218352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,fp8,0,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,128,1,fp8,fp8,0,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,float16,0,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.05028266708056132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,128,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.04773333172003428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,float16,0,0.042410666743914284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,fp8,0,0.04205866654713949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,128,1,fp8,fp8,0,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.048112000028292336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,float16,0,0.041984001795450844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,fp8,0,0.04340800146261851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,128,1,fp8,fp8,0,0.04286933441956838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.05099200208981832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,float16,0,0.028143999477227528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,128,1,fp8,fp8,0,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.03201599915822347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,float16,0,0.02696000039577484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,128,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,128,1,fp8,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,128,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,128,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,128,1,fp8,fp8,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,float16,0,0.3904106616973877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,0.3951093355814616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,fp8,0,0.4167040189107259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,128,1,fp8,fp8,0,0.3633013168970744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,64,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,0.3900800148646037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,0.3686453501383464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,float16,0,0.3987199862798055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,0.4026613235473633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,fp8,0,0.39613866806030273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,128,1,fp8,fp8,0,0.3670986493428548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,0.39903998374938965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,0.3735893170038859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,float16,0,0.4049813350041707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,0.41067198912302655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,fp8,0,0.40478400389353436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,128,1,fp8,fp8,0,0.3873973290125529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,0.40914666652679443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,0.3965173165003459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,float16,0,0.21209599574406943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,0.2162239948908488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,fp8,0,0.20958934227625528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,128,1,fp8,fp8,0,0.21402132511138916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,0.21251734097798666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.2172693411509196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,float16,0,0.20362132787704468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.20620266596476236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,fp8,0,0.20175999402999878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,128,1,fp8,fp8,0,0.19180800517400107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.2036479910214742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.19324266910552979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,float16,0,0.20837867259979248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.20777066548665366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,fp8,0,0.20600533485412598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,128,1,fp8,fp8,0,0.19362133741378784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.2079253395398458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.1986666719118754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,float16,0,0.21199466784795126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.2141653299331665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,fp8,0,0.2113920052846273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,128,1,fp8,fp8,0,0.2029013236363729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.21405333280563354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,float16,0,0.1174720029036204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.2062079906463623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.11981333295504253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,fp8,0,0.11841066678365071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,128,1,fp8,fp8,0,0.11934933066368103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.11806399623552959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.11963199575742085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,float16,0,0.1109386682510376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.11123733719189961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,fp8,0,0.10945066809654236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,128,1,fp8,fp8,0,0.10455466310183208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.1071519951025645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,float16,0,0.11332266529401143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.11575466394424438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,fp8,0,0.11322666207949321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,128,1,fp8,fp8,0,0.10967999696731567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.1143839955329895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.11018666625022888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,float16,0,0.11739200353622437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.11877333124478658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,fp8,0,0.11613333225250244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,128,1,fp8,fp8,0,0.11141332983970642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.1176479955514272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.11450133721033733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,float16,0,0.06630933284759521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.0664106657107671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,fp8,0,0.06577600042025249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,128,1,fp8,fp8,0,0.06645333270231883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.06666666766007741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.06810666620731354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,float16,0,0.06373866895834605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.06410666803518932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,fp8,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,128,1,fp8,fp8,0,0.060047999024391174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,float16,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,fp8,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,128,1,fp8,fp8,0,0.060826669136683144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.06411733229955037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,float16,0,0.06478400031725566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,fp8,0,0.06463466584682465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,128,1,fp8,fp8,0,0.06394133468468984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.06648533542950948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,float16,0,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,128,1,fp8,fp8,0,0.042538667718569435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.04404266675313314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,float16,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,128,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,fp8,0,0.042021334171295166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,128,1,fp8,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.04200000067551931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,float16,0,0.042026668787002563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,128,1,fp8,fp8,0,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,float16,0,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,128,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,float16,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,128,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.029066666960716248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,128,1,fp8,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,128,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,fp8,0,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,fp8,0,0.0879200001557668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,128,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,128,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,128,1,fp8,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,float16,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,fp8,0,0.02219199885924657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,float16,0,0.19125332434972128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.1860319972038269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,fp8,0,0.1895893414815267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,128,1,fp8,fp8,0,0.1785866618156433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.18663465976715088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.17564266920089722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,float16,0,0.19241066773732504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.18695465723673502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,fp8,0,0.1909173329671224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,128,1,fp8,fp8,0,0.1797599991162618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.18844266732533774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.17678932348887125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,float16,0,0.19713066021601358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.1945120096206665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,fp8,0,0.19435199101765951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,128,1,fp8,fp8,0,0.18980266650517783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.19128000736236572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.1837493379910787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,float16,0,0.10749866565068562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.10656000177065532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,fp8,0,0.10723732908566792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,128,1,fp8,fp8,0,0.10724266370137532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.10485333204269409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.10497599840164185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,float16,0,0.10123200217882793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.09904000163078308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,fp8,0,0.10074667135874431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,128,1,fp8,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.09924800197283427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.093231995900472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,float16,0,0.10194666186968486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,fp8,0,0.10309867064158122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.1027786632378896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,128,1,fp8,fp8,0,0.09699733058611552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.10026133060455322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.09522133072217305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,float16,0,0.10745066404342651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.1051680048306783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,fp8,0,0.10754666725794475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,128,1,fp8,fp8,0,0.10280000170071919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.10523200035095215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.10169600447018941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,float16,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,fp8,0,0.06135466694831848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,128,1,fp8,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.058415999015172325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.062133332093556724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,float16,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.058287998040517174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,128,1,fp8,fp8,0,0.056517332792282104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.055344000458717346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,float16,0,0.06055466830730438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.05798399945100149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,128,1,fp8,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,float16,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.06030400097370148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,fp8,0,0.0621066689491272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,128,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.05781333148479462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,float16,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,128,1,fp8,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,float16,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,fp8,0,0.03796799977620443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,128,1,fp8,fp8,0,0.037130666275819145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.03811733424663544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.035829332967599235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,float16,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,128,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,float16,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,128,1,fp8,fp8,0,0.03834133346875509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,float16,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,128,1,fp8,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,128,1,fp8,fp8,0,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,float16,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,128,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,128,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,128,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,float16,0,0.10769599676132202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.10876267155011494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,fp8,fp8,0,0.10380267103513081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,fp8,0,0.10652266939481099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.10533333818117778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.10314133763313293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,float16,0,0.108624001344045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.11005866527557373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,fp8,0,0.10892800490061443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,128,1,fp8,fp8,0,0.10339732964833577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.1074026624361674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.10266666611035664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,float16,0,0.11348799864451091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.11370133360226949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,fp8,0,0.11161599556605022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,128,1,fp8,fp8,0,0.11006399989128113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.11111467083295186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.11131733655929565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,float16,0,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.062394668658574425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,fp8,0,0.06101333101590475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,128,1,fp8,fp8,0,0.06518933176994324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.06206400195757548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,float16,0,0.06075199941794077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,fp8,0,0.06046399970849355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,128,1,fp8,fp8,0,0.05810666580994924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.05874133110046387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,float16,0,0.06010133524735769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.06038933495680491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,fp8,0,0.059690664211908974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,128,1,fp8,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.05927466849486033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.05842666824658712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,float16,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.06075199941794077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,fp8,0,0.060602664947509766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,128,1,fp8,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.06051200131575266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,float16,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,128,1,fp8,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,128,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.040037333965301514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,128,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.03845866769552231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,float16,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,128,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.040735999743143715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,float16,0,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,128,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.025829332570234936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,128,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,float16,0,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,128,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,128,1,fp8,fp8,0,0.01964266722400983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,128,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,128,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,128,1,fp8,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,float16,0,0.07913066446781158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.07928533355395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,128,1,fp8,fp8,0,0.07406933108965556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.07469333211580913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,float16,0,0.08021333316961925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.08027199904123943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,fp8,0,0.07866133252779643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,128,1,fp8,fp8,0,0.07447466750939687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.07844799757003784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.07414400080839793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,float16,0,0.08065066734949748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.0805866668621699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,fp8,0,0.08066133161385854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,128,1,fp8,fp8,0,0.07879466811815898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.08055466910203297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.07714133461316426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,float16,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,fp8,0,0.0481333335240682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,128,1,fp8,fp8,0,0.04735999802748362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,float16,0,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,128,1,fp8,fp8,0,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,fp8,0,0.04821866750717163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,128,1,fp8,fp8,0,0.04491200049718221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,float16,0,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,fp8,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,128,1,fp8,fp8,0,0.046485334634780884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,128,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,fp8,0,0.031141333281993866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,128,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,128,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,fp8,0,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,128,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,128,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,128,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,128,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,float16,0,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,float16,0,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,128,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,float16,0,0.06642666459083557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.06689066688219707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,fp8,0,0.06676800052324931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,128,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.06211733321348826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,float16,0,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.068271999557813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,fp8,0,0.06679999828338623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,128,1,fp8,fp8,0,0.06225599845250448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.062314664324124656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,float16,0,0.06836266815662384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.06807466844717662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,128,1,fp8,fp8,0,0.06408533453941345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.0664213349421819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.0643146683772405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,float16,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,128,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,float16,0,0.03990400085846583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,float16,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.04008533308903376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,128,1,fp8,fp8,0,0.03722666700681051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,float16,0,0.040175999204317726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.0401706670721372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,128,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,float16,0,0.027818667391935985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,128,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,float16,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,128,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,float16,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,128,1,fp8,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,128,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,128,1,fp8,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,fp8,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,128,1,fp8,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.016501333564519882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,128,1,fp8,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,float16,0,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.058186665177345276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,128,1,fp8,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,float16,0,0.05835199852784475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.05835199852784475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,fp8,0,0.05811200042565664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,128,1,fp8,fp8,0,0.05602666735649109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,float16,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,128,1,fp8,fp8,0,0.056202664971351624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,float16,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,128,1,fp8,fp8,0,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,128,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.03365333378314972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,128,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,128,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.01964266722400983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,fp8,0,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,128,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,float16,0,0.019845332950353622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,128,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,float16,0,0.24277865886688232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,1.4321173032124836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,fp8,0,0.24447466929753622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,128,1,fp8,fp8,0,0.23029865821202597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,1.4327093760172527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,1.328986644744873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,float16,0,0.2526666720708211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,1.4445759455362956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,fp8,0,0.25697600841522217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,128,1,fp8,fp8,0,0.2435413400332133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,1.447322686513265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,1.3417493502298992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,float16,0,0.14443199833234152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,0.7952426274617513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,fp8,0,0.14921599626541138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,128,1,fp8,fp8,0,0.1430186629295349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,0.7979199886322021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,0.7422133286794027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,float16,0,0.12773866454760233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,0.7765279610951742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,fp8,0,0.13179199894269308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,128,1,fp8,fp8,0,0.1256160040696462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,0.779210646947225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,0.7232906818389893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,float16,0,0.13396799564361572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,0.7817119757334391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,fp8,0,0.1355946660041809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,128,1,fp8,fp8,0,0.1337493360042572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,0.785045305887858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,0.731824000676473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,float16,0,0.08608532945315044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,0.46141334374745685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,fp8,0,0.08866666754086812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,128,1,fp8,fp8,0,0.08764800429344177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,0.46475199858347577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,0.4326133330663045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,float16,0,0.08032000064849854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,0.4537973403930664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,fp8,0,0.08042666812737782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,128,1,fp8,fp8,0,0.07735466460386912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,0.45579198996225995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,0.4225813150405884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,float16,0,0.08271466692288716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,0.45609601338704425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,128,1,fp8,fp8,0,0.08055999875068665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,0.45764267444610596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,0.42446398735046387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,float16,0,0.06406400104363759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.3058346708615621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,fp8,0,0.06414933502674103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,128,1,fp8,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,0.306442658106486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,0.2855306665102641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,float16,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.30611199140548706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,fp8,0,0.06401066482067108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,128,1,fp8,fp8,0,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.3059840003649394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,0.2852533260981242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,float16,0,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.3059786756833394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,128,1,fp8,fp8,0,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.306005338827769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,0.28495999177296955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,float16,0,0.18440000216166177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,0.8713493347167969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,fp8,0,0.18709333737691244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,0.31710400183995563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,128,1,fp8,fp8,0,0.1765706737836202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,0.8720266819000244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,0.8077920277913412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,float16,0,0.19328000148137411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,fp8,0,0.1958613395690918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,0.8771680196126302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,128,1,fp8,fp8,0,0.1872746745745341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,0.8787573178609213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,0.8326133092244467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,float16,0,0.11482133467992146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,fp8,0,0.11783466736475627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,0.492789347966512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,128,1,fp8,fp8,0,0.1132426659266154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,0.49726935227711994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,0.4657599925994873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,float16,0,0.10325866937637329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,fp8,0,0.10537599523862202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,0.4816480080286662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,128,1,fp8,fp8,0,0.09941866993904114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,0.4844906727472941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,0.44970667362213135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,float16,0,0.10521599650382996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,fp8,0,0.10745066404342651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,0.4848639965057373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,128,1,fp8,fp8,0,0.10347200433413188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,0.48727468649546307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,0.45486398537953693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,float16,0,0.06856533388296764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,0.2964373429616292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,fp8,0,0.07017066578070323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,128,1,fp8,fp8,0,0.06646933158238728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,0.29781333605448407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,0.27873067061106366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,float16,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,fp8,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,0.2919146617253621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,128,1,fp8,fp8,0,0.06039466460545858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,0.2918613354365031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,0.2768213351567586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,float16,0,0.0639573335647583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,fp8,0,0.06613866488138835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,0.2959413329760234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,128,1,fp8,fp8,0,0.062090665102005005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,0.29474665721257526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,0.2732479969660441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,float16,0,0.05588266750176748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.2015413244565328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,128,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.20038400093714395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.18645334243774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,float16,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,fp8,0,0.05454933146635691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.20094933112462363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,128,1,fp8,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.20107734203338623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,float16,0,0.054645334680875145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.2012373407681783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,128,1,fp8,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.20099733273188272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.18713066975275675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,float16,0,0.15397866566975912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,0.6357653141021729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,fp8,0,0.1560426652431488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,128,1,fp8,fp8,0,0.14918399850527445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,0.6384479999542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,0.5947999954223633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,float16,0,0.16049066185951233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,0.6422293186187744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,fp8,0,0.16396266222000122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,128,1,fp8,fp8,0,0.15848533312479654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,0.6459786494572958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,0.6031359831492106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,float16,0,0.09569066762924194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,0.36904001235961914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,fp8,0,0.09894399841626485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,128,1,fp8,fp8,0,0.09718400239944458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,0.373306671778361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,0.3494826555252075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,float16,0,0.08853866656621297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,0.3613813320795695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,fp8,0,0.08903466661771138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,128,1,fp8,fp8,0,0.08418666323026021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,0.36266668637593585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,0.33661333719889325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,float16,0,0.0890880028406779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,0.36138665676116943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,fp8,0,0.09108799695968628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,128,1,fp8,fp8,0,0.08799466490745544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,0.3652533292770386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,0.3402880032857259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,float16,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.23026132583618164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,fp8,0,0.062037333846092224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,128,1,fp8,fp8,0,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.2322346568107605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,0.21648534138997397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,float16,0,0.05876266459623972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.22865599393844604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,128,1,fp8,fp8,0,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.2283253272374471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,0.21202667554219565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,float16,0,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.2285919984181722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,fp8,0,0.0603413333495458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,128,1,fp8,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.22872533400853476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,0.21357333660125732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,float16,0,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.151418666044871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,fp8,0,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,128,1,fp8,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.15018133322397867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.14076266686121622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,float16,0,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.15221866965293884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,128,1,fp8,fp8,0,0.04773333172003428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.1507306694984436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.14009599884351095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,float16,0,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.15226667126019797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,128,1,fp8,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.15074132879575095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.140255997578303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,float16,0,0.23546133438746134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,0.8141653537750244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,fp8,0,0.2386186718940735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,128,1,fp8,fp8,0,0.2253333330154419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,0.817530632019043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,0.759178638458252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,float16,0,0.24790932734807333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,0.8282773494720459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,fp8,0,0.2512800097465515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,128,1,fp8,fp8,0,0.23848533630371094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,0.8302880128224691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,0.7721439997355143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,float16,0,0.13923733433087668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,0.45761064688364667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,fp8,0,0.14231466253598532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,128,1,fp8,fp8,0,0.138373335202535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,0.46186665693918866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,0.43138666947682697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,float16,0,0.12164800365765889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,0.4392053286234538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,fp8,0,0.12418133020401001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,128,1,fp8,fp8,0,0.11970667044321696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,0.4413119951883952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,0.41178667545318604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,float16,0,0.12690666317939758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,0.4438933531443278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,fp8,0,0.1309706668059031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,128,1,fp8,fp8,0,0.12756799658139548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,0.44722668329874676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,0.42045867443084717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,float16,0,0.07860266665617625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.2614240050315857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,128,1,fp8,fp8,0,0.0802293320496877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.26554133494695026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,0.2503040035565694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,float16,0,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.2558506727218628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,fp8,0,0.07448000212510426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,128,1,fp8,fp8,0,0.07050133248170216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.25763734181722003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,0.2400426665941874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,float16,0,0.07420800129572551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.25884799162546795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,fp8,0,0.07648000121116638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,128,1,fp8,fp8,0,0.07281599938869476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.2609440088272095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,0.2442506750424703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,float16,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.17137600978215536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,fp8,0,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,128,1,fp8,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.17141334215799967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.16031466921170553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,float16,0,0.04586666822433472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.16658133268356323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,fp8,0,0.04766400158405304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,128,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.16880534092585245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.15665599703788757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.16863999764124551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,128,1,fp8,fp8,0,0.04762666424115499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.17003732919692993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.15853333473205566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,float16,0,0.03779733429352442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.11768000324567159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,128,1,fp8,fp8,0,0.038202665746212006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.11823999881744385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.11167466640472412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,float16,0,0.03825599948565165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.11767466862996419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,128,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.1190720001856486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,float16,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.1176479955514272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,128,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.11937600374221802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.11166399717330933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,float16,0,0.18247999747594199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,0.5095839897791544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,fp8,0,0.18440532684326172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,128,1,fp8,fp8,0,0.17641067504882812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,0.5115893284479777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,0.47654398282368976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,float16,0,0.19130132595698038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,0.5193599859873453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,fp8,0,0.19371734062830606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,128,1,fp8,fp8,0,0.18547199169794717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,0.5215040047963461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,0.4883466561635335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,float16,0,0.11124799648920695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,0.2953439950942993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,fp8,0,0.1132319966952006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,128,1,fp8,fp8,0,0.10973866780598958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,0.2972106734911601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,0.2794613242149353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,float16,0,0.097871998945872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,0.27926933765411377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,fp8,0,0.09923199812571208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,128,1,fp8,fp8,0,0.09316266576449077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,0.2813066641489665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,0.2629973292350769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,float16,0,0.10187199711799622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,0.2845226724942525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,fp8,0,0.10318400462468465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,128,1,fp8,fp8,0,0.09994133313496907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,0.2853013277053833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,0.26708267132441205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,float16,0,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.1734773317972819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,fp8,0,0.06393066545327504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,128,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.17553067207336426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.1646773318449656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,float16,0,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.16914665699005127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,128,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.16877333323160806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.15773333112398782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,float16,0,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.170522669951121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,128,1,fp8,fp8,0,0.05805333455403646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.17202132940292358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.1585919956366221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,float16,0,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.11556800206502278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,128,1,fp8,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.1153600017229716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.10755733648935954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,float16,0,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.11352533102035522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,128,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.11261866490046184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.10500799616177876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,float16,0,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.11366933584213257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,128,1,fp8,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.11337066690127055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.10562666257222493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,float16,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.09297600388526917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,128,1,fp8,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.09285333752632141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.0867199997107188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,float16,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.09249066313107808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,128,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.09292800227801006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.08684800068537395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,float16,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.09272000193595886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,128,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.09274666508038838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.08693866928418477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,float16,0,0.24116800228754678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,0.5113439957300822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,fp8,0,0.24341332912445068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,128,1,fp8,fp8,0,0.23089067141215006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,0.5130506753921509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,0.477946678797404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,float16,0,0.25519466400146484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,0.524895985921224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,fp8,0,0.2558346589406331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,128,1,fp8,fp8,0,0.2417866587638855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,0.5260693232218424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,0.4909120003382365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,float16,0,0.14011733730634054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,0.290175994237264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,fp8,0,0.14270400007565817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,128,1,fp8,fp8,0,0.139957328637441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,0.29346134265263873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,0.2751839955647786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,float16,0,0.12150933345158894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.2699306607246399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,fp8,0,0.12363732854525249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,128,1,fp8,fp8,0,0.11843732992808025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.2711893320083618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,0.2547733386357625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,float16,0,0.12719999750455221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.2752426664034526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,fp8,0,0.12994133432706198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,128,1,fp8,fp8,0,0.1276746690273285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.2785973350207011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,0.262938658396403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,float16,0,0.07665066421031952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.16269866625467935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,fp8,0,0.07870399951934814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,128,1,fp8,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.1643946667512258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.15635200341542563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,float16,0,0.07007466753323872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.15625600020090738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,fp8,0,0.07243200143178304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,128,1,fp8,fp8,0,0.06649066507816315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.15826666355133057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.14619732896486917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,float16,0,0.07047999898592631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.15798933307329813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,fp8,0,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,128,1,fp8,fp8,0,0.07037866612275441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.15965333580970764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.14812800288200378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,float16,0,0.045498669147491455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.10321600238482158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,128,1,fp8,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.10540266831715901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.09884799520174663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,float16,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.10102400183677673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,128,1,fp8,fp8,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.1032533347606659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,float16,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.10202667117118835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,128,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.10388799508412679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.0969546635945638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,128,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.07049066821734111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.06632000207901001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,float16,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.06846400101979573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,128,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.0684746652841568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.06446933249632518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.07030400137106578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.06570133566856384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.06646400193373363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,128,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.06818133095900218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.062496001521746315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,float16,0,0.028736000259717304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.06612800061702728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.06628799935181935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,float16,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,128,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.06348266700903575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,float16,0,0.18637333313624063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,0.33242666721343994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,fp8,0,0.18578133980433145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,128,1,fp8,fp8,0,0.1771413286526998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,0.33321599165598553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,0.3130026658376058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,float16,0,0.19285867611567178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,0.3394453525543213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,fp8,0,0.1946986714998881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,128,1,fp8,fp8,0,0.1872746745745341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,0.3418933153152466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,0.32337599992752075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,float16,0,0.1090666651725769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.19105066855748495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,fp8,0,0.11078400413195293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,128,1,fp8,fp8,0,0.109525332848231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.19531200329462686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.1856000026067098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,float16,0,0.0974026620388031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.17890665928522745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,fp8,0,0.09701866904894511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,128,1,fp8,fp8,0,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.18071999152501425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.16875199476877847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,float16,0,0.09914666414260864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.18132267395655313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,fp8,0,0.10100266337394714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,128,1,fp8,fp8,0,0.09732266267140706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.1853653391202291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.17542399962743124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,float16,0,0.06133866806825002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,fp8,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,128,1,fp8,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.11348799864451091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,float16,0,0.05630399783452352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.10713066657384236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,fp8,0,0.05680533250172933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,128,1,fp8,fp8,0,0.05387733379999796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.1083573301633199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.10107733805974324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,float16,0,0.05819733440876007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.10813867052396138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,fp8,0,0.05841066439946493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,128,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.11104533076286316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.10302933057149251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,float16,0,0.03990933299064636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.0724373310804367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,128,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.0743999977906545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,float16,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.07047466437021892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,128,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.07229866584142049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,float16,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,fp8,0,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,128,1,fp8,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.07237333556016286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.06826133529345195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,128,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.056346664826075234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,float16,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,float16,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,fp8,0,0.0277813325325648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.0558186670144399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.052154665191968284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.051269332567850746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,128,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.05426133175690969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,fp8,0,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,128,1,fp8,fp8,0,0.026288000245889027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.054618666569391884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.050197333097457886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,float16,0,0.2459999918937683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,0.35844266414642334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,fp8,0,0.24408000707626343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,128,1,fp8,fp8,0,0.23046932617823282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,0.3556533257166545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,0.3349546591440837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,float16,0,0.2576853235562642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,0.3689279953638713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,fp8,0,0.2563146750132243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,128,1,fp8,fp8,0,0.24341867367426553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,0.370250662167867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,0.3448053201039632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,float16,0,0.1418186624844869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.20488532384236655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,fp8,0,0.14350400368372598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,128,1,fp8,fp8,0,0.14090133706728616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.2060799996058146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,0.19814399878184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,float16,0,0.12242133418718974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.18815465768178305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,fp8,0,0.12568533420562744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,128,1,fp8,fp8,0,0.12229866782824199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.1867039998372396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.1795253356297811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,float16,0,0.12820266683896384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.19130667050679526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,fp8,0,0.13198399543762207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,128,1,fp8,fp8,0,0.12941333651542664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.19386667013168335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.18736533323923746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,float16,0,0.07647466659545898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,fp8,0,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,128,1,fp8,fp8,0,0.07948799928029378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.11472533146540324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.11122666796048482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,float16,0,0.07014399766921997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.10521066188812256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,fp8,0,0.07243200143178304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,128,1,fp8,fp8,0,0.06842133402824402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.10081066687901814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,float16,0,0.07218133409818013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.1071626643339793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,fp8,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,128,1,fp8,fp8,0,0.07028266787528992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.1113920013109843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.10317333539326985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,float16,0,0.04424533247947693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.07017066578070323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,fp8,0,0.04774933556715647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,128,1,fp8,fp8,0,0.045552000403404236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.07216533521811168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.06853333115577698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,float16,0,0.04340266684691111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.0683786670366923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,128,1,fp8,fp8,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.06963733335336049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,float16,0,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.07010133564472198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,fp8,0,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,128,1,fp8,fp8,0,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.07044266661008199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,128,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,fp8,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,128,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.04731733103593191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,128,1,fp8,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.046037331223487854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,fp8,0,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,128,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,128,1,fp8,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.04370133578777313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.04273599882920583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,fp8,0,0.02784000088771184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,128,1,fp8,fp8,0,0.02607999990383784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.044405331214269005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,float16,0,0.02363733450571696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.04012266546487808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,fp8,0,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,128,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,128,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,float16,0,0.20552533864974976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.262389341990153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,fp8,0,0.20286399126052856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,128,1,fp8,fp8,0,0.1962613264719645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.2613439957300822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.24981866280237833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,float16,0,0.21409600973129272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.27082665761311847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,fp8,0,0.21234132846196493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,128,1,fp8,fp8,0,0.20197866360346475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.2701386610666911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,0.2567039926846822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,float16,0,0.11998933553695679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.15239466230074564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,fp8,0,0.11892267068227132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,128,1,fp8,fp8,0,0.1197653313477834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.15280000368754068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.1495306690533956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,float16,0,0.10329066713651021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.13383466998736063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,fp8,0,0.10433066884676616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,128,1,fp8,fp8,0,0.09728533029556274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.13529599706331888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.12571733196576437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,float16,0,0.10730133454004924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.13769599795341492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,fp8,0,0.1076586643854777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,128,1,fp8,fp8,0,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.13958932956059775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.13479466239611307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,float16,0,0.06485866506894429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.08493333061536153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,fp8,0,0.06531733274459839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,128,1,fp8,fp8,0,0.06260799864927928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.08477866649627686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.08103999992211659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,float16,0,0.05982399980227152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.08078399797280629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,fp8,0,0.06100266675154368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,128,1,fp8,fp8,0,0.058005332946777344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.08082133531570435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.07467199862003326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,float16,0,0.06098133325576782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.08228266735871632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,128,1,fp8,fp8,0,0.059631998340288796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.08245866497357686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.07634133100509644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,float16,0,0.04165333261092504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,128,1,fp8,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.0537120004494985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.05221866567929586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,128,1,fp8,fp8,0,0.03919466584920883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.05417599777380625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.048901334404945374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,float16,0,0.04092800120512644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.05330666899681091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,fp8,0,0.042912001411120095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,128,1,fp8,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.05527466535568237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.051632001996040344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,128,1,fp8,fp8,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.03855466594298681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,float16,0,0.027903998891512554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,fp8,0,0.030085332691669464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,128,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,float16,0,0.02409599969784419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.03584533433119456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,128,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,128,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.035487999518712364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,float16,0,0.20910932620366415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.24653865893681845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,fp8,0,0.2095359961191813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,128,1,fp8,fp8,0,0.20029866695404053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.2446826696395874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.2328746716181437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,float16,0,0.21258666117986044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.24893333514531454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,fp8,0,0.20966933170954385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,128,1,fp8,fp8,0,0.20857600371042886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.24847465753555298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,0.242959996064504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,float16,0,0.12089600165685017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.14225066701571146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,fp8,0,0.11812266707420349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,128,1,fp8,fp8,0,0.11980266372362773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.141893337170283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.14041067163149515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,float16,0,0.11157332857449849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.13275200128555298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,fp8,0,0.11024000247319539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,128,1,fp8,fp8,0,0.10544533530871074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.1336426635583242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.1267733375231425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,float16,0,0.114464004834493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.13578133781750998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,fp8,0,0.11303466558456421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,128,1,fp8,fp8,0,0.11348266402880351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.1338986655076345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.13241599996884665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,float16,0,0.06862399975458781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.08251200119654338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,fp8,0,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,128,1,fp8,fp8,0,0.06635733445485432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.07999466856320699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.07798933486143748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,float16,0,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.07663466533025105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,fp8,0,0.06305600206057231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,128,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.0753119985262553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,float16,0,0.06558933357397716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.07737066845099132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,fp8,0,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,128,1,fp8,fp8,0,0.06191466748714447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,float16,0,0.04330133398373922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,128,1,fp8,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.05207466582457224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.05153599878152212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,128,1,fp8,fp8,0,0.042277331153551735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.050255998969078064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.04853333532810211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,float16,0,0.0421066681543986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.05227200190226237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,fp8,0,0.042965332667032875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,128,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.04896533489227295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,float16,0,0.02903466671705246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,128,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,128,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.03376533339420954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.02903466671705246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.028912000358104706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,128,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,float16,0,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,128,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,128,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.026933332284291584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,fp8,0,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,float16,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,128,1,fp8,fp8,0,0.020736000190178554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,float16,0,0.2060426672299703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.20799465974171957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,fp8,0,0.20334933201471964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,128,1,fp8,fp8,0,0.1930720011393229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.20675732692082724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.19670399030049643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,float16,0,0.20617600282033285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.20791999499003092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,fp8,0,0.20353599389394125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,128,1,fp8,fp8,0,0.20061866442362467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.20791999499003092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.20350933074951172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,float16,0,0.11627200245857239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.11905066172281902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,fp8,0,0.11644267042477925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,128,1,fp8,fp8,0,0.11661866307258606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.11736533045768738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.11749866604804993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,float16,0,0.10932266712188721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.11176533500353496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,fp8,0,0.10967999696731567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,128,1,fp8,fp8,0,0.10374400019645691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.10967466235160828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.10485333204269409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,float16,0,0.11389333009719849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.11372799674669902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,fp8,0,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,128,1,fp8,fp8,0,0.11142933368682861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.11193600296974182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.11134933431943257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,float16,0,0.06606400012969971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.06582933167616527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,fp8,0,0.06533333162466685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,128,1,fp8,fp8,0,0.06443200012048085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.06598933537801106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,float16,0,0.062165334820747375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.06229866544405619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,128,1,fp8,fp8,0,0.060165335734685264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.06204266846179962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.060090666015942894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,float16,0,0.06351999938488007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,fp8,0,0.06252266466617584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,128,1,fp8,fp8,0,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.061808000008265175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,float16,0,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,128,1,fp8,fp8,0,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.043840001026789345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.04372799893220266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,128,1,fp8,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,float16,0,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,fp8,0,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,128,1,fp8,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,float16,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,128,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,128,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.029029332101345062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,float16,0,0.022367998957633972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,128,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,fp8,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,128,1,fp8,fp8,0,0.020351999749739964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,float16,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,128,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,float16,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,fp8,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,float16,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,fp8,0,0.020394666741291683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,float16,0,0.10217600067456563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.10205333431561787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,fp8,0,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,128,1,fp8,fp8,0,0.09619200229644775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.09947199622790019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.09486400087674458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,float16,0,0.103685329357783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.1030613382657369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,fp8,0,0.10326932867368062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,128,1,fp8,fp8,0,0.10326400399208069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.10070400436719258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.1011946698029836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,float16,0,0.061237335205078125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,fp8,0,0.06213866670926412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,128,1,fp8,fp8,0,0.06157866617043813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.05950933198134104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.05884799857934316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,float16,0,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.05793599784374237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,128,1,fp8,fp8,0,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.0554666668176651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,float16,0,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,fp8,0,0.0592853327592214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,128,1,fp8,fp8,0,0.05799466868241628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.054901331663131714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,float16,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.03811733424663544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,128,1,fp8,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,float16,0,0.03812266637881597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,128,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.037733333806196846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,float16,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,fp8,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,128,1,fp8,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.03788266579310099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.03641066700220108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,128,1,fp8,fp8,0,0.026762666801611584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,128,1,fp8,fp8,0,0.0259253333012263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.02038399999340375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,128,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,float16,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,128,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,float16,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,128,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,float16,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,128,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.06002666552861532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.05643199880917867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,float16,0,0.05985599756240845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.060565332571665444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,fp8,0,0.060959999759991966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,128,1,fp8,fp8,0,0.05925333499908447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.060362666845321655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,float16,0,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,128,1,fp8,fp8,0,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,float16,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,fp8,0,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,128,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,float16,0,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,128,1,fp8,fp8,0,0.03832533210515976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,fp8,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,128,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,128,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,fp8,0,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,128,1,fp8,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,128,1,fp8,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,float16,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,128,1,fp8,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,fp8,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,128,1,fp8,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.04548266530036926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,float16,0,0.03298133363326391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,128,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,float16,0,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,128,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,128,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,float16,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,128,1,fp8,fp8,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.02279466638962428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.020799999435742695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,128,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,128,1,fp8,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,128,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,float16,0,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,128,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,float16,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,fp8,0,0.04088533421357473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,128,1,fp8,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,128,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,float16,0,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,fp8,0,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,fp8,0,0.020736000190178554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,128,1,fp8,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,128,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,128,1,fp8,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,128,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,float16,0,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,128,1,fp8,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,128,1,fp8,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,fp8,0,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,128,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.024154665569464367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,fp8,0,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,128,1,fp8,fp8,0,0.020015999674797058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,float16,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.020106667031844456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,128,1,fp8,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,128,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,float16,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,128,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,128,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,float16,0,0.13210133711496988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,float16,0,0.7835413614908854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,fp8,0,0.13210666179656982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,128,1,fp8,fp8,0,0.12797866264979044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,fp8,0,0.7844693660736084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,64,0,1,fp8,fp8,0,0.7264959812164307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,float16,0,0.08082666496435802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,fp8,0,0.08494399984677632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,float16,0,0.4553440014521281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,128,1,fp8,fp8,0,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,fp8,0,0.4580586751302083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,64,0,1,fp8,fp8,0,0.42958935101826984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,float16,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,fp8,0,0.07855999966462453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,float16,0,0.4555893341700236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,128,1,fp8,fp8,0,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,fp8,0,0.48314666748046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,64,0,1,fp8,fp8,0,0.41868265469868976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,float16,0,0.051413332422574363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,float16,0,0.2916640043258667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,fp8,0,0.05351466437180837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,128,1,fp8,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,fp8,0,0.29580267270406085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,64,0,1,fp8,fp8,0,0.2736533284187317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,float16,0,0.2934880057970683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,128,1,fp8,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,fp8,0,0.2916746735572815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,64,0,1,fp8,fp8,0,0.2712106704711914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,float16,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,float16,0,0.2036906679471334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,128,1,fp8,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,fp8,0,0.20357867081960043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,64,0,1,fp8,fp8,0,0.18939199050267538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,float16,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,float16,0,0.2034346659978231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,128,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,fp8,0,0.203439990679423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,64,0,1,fp8,fp8,0,0.18863467375437418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,float16,0,0.10171733299891154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,fp8,0,0.10329066713651021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,float16,0,0.48236799240112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,128,1,fp8,fp8,0,0.10056533416112264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,fp8,0,0.5164213180541992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,64,0,1,fp8,fp8,0,0.4503680070241292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,float16,0,0.06785066425800323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,float16,0,0.29203200340270996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,fp8,0,0.06658666829268138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,128,1,fp8,fp8,0,0.06308799982070923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,fp8,0,0.2946666677792867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,64,0,1,fp8,fp8,0,0.27316800753275555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,float16,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,128,1,fp8,fp8,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,fp8,0,0.31204267342885333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,fp8,fp8,0,0.2809813419977824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,float16,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,float16,0,0.19121599197387695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,128,1,fp8,fp8,0,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,fp8,0,0.19286400079727173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,64,0,1,fp8,fp8,0,0.1792800029118856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,float16,0,0.04419733087221781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,float16,0,0.19204266866048178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,fp8,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,128,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,fp8,0,0.1919040083885193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,64,0,1,fp8,fp8,0,0.17690666516621908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,float16,0,0.15432533621788025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,128,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,fp8,0,0.15658666690190634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,64,0,1,fp8,fp8,0,0.14632532993952432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,float16,0,0.15631999572118124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,fp8,0,0.03427733232577642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,128,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,fp8,0,0.15569067001342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,64,0,1,fp8,fp8,0,0.14516799648602804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,float16,0,0.09127466877301534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,float16,0,0.36422932147979736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,fp8,0,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,128,1,fp8,fp8,0,0.08534933129946391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,fp8,0,0.3814186652501424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,64,0,1,fp8,fp8,0,0.33914132912953693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,float16,0,0.06042666733264923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,float16,0,0.2302186687787374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,fp8,0,0.061994666854540505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,128,1,fp8,fp8,0,0.057999998331069946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,fp8,0,0.23170133431752524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,64,0,1,fp8,fp8,0,0.21531200408935547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,float16,0,0.0565280020236969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,float16,0,0.22702932357788086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,128,1,fp8,fp8,0,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,fp8,0,0.22829866409301758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,64,0,1,fp8,fp8,0,0.21037334203720093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,float16,0,0.0420959989229838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,float16,0,0.14538666605949402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,fp8,0,0.04277333120505015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,128,1,fp8,fp8,0,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,fp8,0,0.14468800028165182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,64,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,float16,0,0.14436266819636026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,128,1,fp8,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,fp8,0,0.14239466190338135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,64,0,1,fp8,fp8,0,0.13380799690882364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,float16,0,0.13212266564369202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,128,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,fp8,0,0.13192533453305563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,64,0,1,fp8,fp8,0,0.12371733784675598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,float16,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,float16,0,0.13329066832860312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,128,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,fp8,0,0.13179199894269308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,64,0,1,fp8,fp8,0,0.12386666735013326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,float16,0,0.13168000181516012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,float16,0,0.4482613404591878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,fp8,0,0.13201600313186646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,128,1,fp8,fp8,0,0.12796266873677573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,fp8,0,0.45103998978932697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,64,0,1,fp8,fp8,0,0.42176000277201336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,float16,0,0.07891733447710673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,float16,0,0.2637493411699931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,fp8,0,0.08055999875068665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,128,1,fp8,fp8,0,0.07868800063927968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,fp8,0,0.26683733860651654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,64,0,1,fp8,fp8,0,0.2502133250236511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,float16,0,0.0739573339621226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,float16,0,0.25843199094136554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,fp8,0,0.07461333274841309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,128,1,fp8,fp8,0,0.07088533540566762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,fp8,0,0.2613226572672526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,64,0,1,fp8,fp8,0,0.24221867322921753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,float16,0,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,float16,0,0.16900267203648886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,128,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,fp8,0,0.16939733425776163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,64,0,1,fp8,fp8,0,0.15849066774050394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,float16,0,0.16711467504501343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,128,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,fp8,0,0.17955732345581055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,64,0,1,fp8,fp8,0,0.16686934232711792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,float16,0,0.1135093371073405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,128,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,fp8,0,0.11335999766985576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,64,0,1,fp8,fp8,0,0.10549867153167725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,float16,0,0.11150399843851726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,128,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,fp8,0,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,64,0,1,fp8,fp8,0,0.10462933778762817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,float16,0,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,float16,0,0.10955199599266052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,128,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,fp8,0,0.1093386709690094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,64,0,1,fp8,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,float16,0,0.10947199662526448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,fp8,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,128,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,fp8,0,0.10950400431950887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,64,0,1,fp8,fp8,0,0.1032480001449585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,float16,0,0.1035093367099762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,float16,0,0.28703999519348145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,128,1,fp8,fp8,0,0.10227732857068379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,fp8,0,0.2882560094197591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,64,0,1,fp8,fp8,0,0.268069326877594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,float16,0,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,float16,0,0.17317867279052734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,fp8,0,0.06619733572006226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,128,1,fp8,fp8,0,0.06260266900062561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,fp8,0,0.1771199901898702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,64,0,1,fp8,fp8,0,0.1649279991785685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,float16,0,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,float16,0,0.17059199015299478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,fp8,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,128,1,fp8,fp8,0,0.056458666920661926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,fp8,0,0.17353065808614096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,64,0,1,fp8,fp8,0,0.16076800227165222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,float16,0,0.04199466605981191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,float16,0,0.11347732941309611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,fp8,0,0.044079999128977455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,fp8,0,0.11570666233698527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,64,0,1,fp8,fp8,0,0.10754666725794475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,float16,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,float16,0,0.12171199917793274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,128,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,fp8,0,0.11355732878049214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,64,0,1,fp8,fp8,0,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,float16,0,0.08901333808898926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,128,1,fp8,fp8,0,0.027802666028340656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,fp8,0,0.08889599641164143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,64,0,1,fp8,fp8,0,0.0836853285630544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,float16,0,0.028991999725500744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,float16,0,0.08693866928418477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,fp8,0,0.08892266949017842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,64,0,1,fp8,fp8,0,0.08284799754619598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,float16,0,0.08683199683825175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,fp8,0,0.0860746701558431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,64,0,1,fp8,fp8,0,0.08058133224646251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,float16,0,0.08684266606966655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,128,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,fp8,0,0.08708799878756206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,64,0,1,fp8,fp8,0,0.08089066545168559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,float16,0,0.1332480013370514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,float16,0,0.28338666756947833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,fp8,0,0.13274666666984558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,128,1,fp8,fp8,0,0.1285973290602366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,fp8,0,0.29577066500981647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,64,0,1,fp8,fp8,0,0.27990400791168213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,float16,0,0.08053333560625713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,float16,0,0.16714133818944296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,fp8,0,0.08287466565767924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,128,1,fp8,fp8,0,0.08155199885368347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,fp8,0,0.16819200913111368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,64,0,1,fp8,fp8,0,0.15958399573961893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,float16,0,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,float16,0,0.15966399510701498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,fp8,0,0.0755680004755656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,128,1,fp8,fp8,0,0.07122666637102763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,fp8,0,0.17312000195185342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,64,0,1,fp8,fp8,0,0.15235199530919394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,float16,0,0.04603200157483419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,float16,0,0.1053546667098999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,128,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,fp8,0,0.107205331325531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,64,0,1,fp8,fp8,0,0.09905067086219788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,float16,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,float16,0,0.10454400380452473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,fp8,0,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,128,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,fp8,0,0.10519466797510783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,64,0,1,fp8,fp8,0,0.09778666496276855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,float16,0,0.06896000107129414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,128,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,fp8,0,0.07053333520889282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,64,0,1,fp8,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,float16,0,0.0684746652841568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,128,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,fp8,0,0.06815466781457265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,64,0,1,fp8,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,float16,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,fp8,0,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,64,0,1,fp8,fp8,0,0.06211199859778086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,float16,0,0.0650133341550827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,fp8,0,0.026341333985328674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,128,1,fp8,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,fp8,0,0.06442666550477345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,64,0,1,fp8,fp8,0,0.060592000683148704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,float16,0,0.0625600020090739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,128,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,fp8,0,0.06211733321348826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,64,0,1,fp8,fp8,0,0.05842666824658712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,float16,0,0.06249066690603892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,fp8,0,0.06187200049559275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,64,0,1,fp8,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,float16,0,0.10989866654078166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,float16,0,0.19343467553456625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,fp8,0,0.11173333724339803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,128,1,fp8,fp8,0,0.10817600289980571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,fp8,0,0.19242666165033975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,64,0,1,fp8,fp8,0,0.18345065911610922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,float16,0,0.06418666740258534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,float16,0,0.12105066577593486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,fp8,0,0.0689279983441035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,128,1,fp8,fp8,0,0.0644160012404124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,fp8,0,0.11615467071533203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,64,0,1,fp8,fp8,0,0.10962133606274922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,float16,0,0.062181333700815834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,float16,0,0.11297067006429036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,fp8,0,0.06233066817124685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,128,1,fp8,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,fp8,0,0.1132480005423228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,64,0,1,fp8,fp8,0,0.10529067118962605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,float16,0,0.07429333527882893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,128,1,fp8,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,fp8,0,0.07657599945863088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,64,0,1,fp8,fp8,0,0.07041066884994507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,float16,0,0.0729066679875056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,fp8,0,0.042821332812309265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,128,1,fp8,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,fp8,0,0.07429333527882893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,64,0,1,fp8,fp8,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,float16,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,128,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,64,0,1,fp8,fp8,0,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,float16,0,0.05588266750176748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,fp8,0,0.0563679983218511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,64,0,1,fp8,fp8,0,0.05261866748332977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,float16,0,0.05207466582457224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,fp8,0,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,64,0,1,fp8,fp8,0,0.050154666105906166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,float16,0,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,fp8,0,0.024906667570273083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,128,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,fp8,0,0.052229334910710655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,64,0,1,fp8,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,float16,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,128,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,fp8,0,0.051738664507865906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,64,0,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,float16,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,64,0,1,fp8,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,float16,0,0.13339733084042868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,float16,0,0.19542932510375977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,fp8,0,0.13215999801953635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,128,1,fp8,fp8,0,0.12749333182970682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,fp8,0,0.1977226734161377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,64,0,1,fp8,fp8,0,0.18542399009068808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,float16,0,0.07881600161393483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,float16,0,0.11477333307266235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,fp8,0,0.08109333117802937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,128,1,fp8,fp8,0,0.07875200112660725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,fp8,0,0.11755733688672383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,64,0,1,fp8,fp8,0,0.11326400438944499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,float16,0,0.07436266541481018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,float16,0,0.11030933260917664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,fp8,0,0.07623999814192454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,128,1,fp8,fp8,0,0.07155733307202657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,fp8,0,0.11311466495196025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,64,0,1,fp8,fp8,0,0.10613866647084554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,float16,0,0.04651733239491781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,float16,0,0.07166933516661327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,fp8,0,0.04837333162625631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,128,1,fp8,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,fp8,0,0.07460266848405202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,64,0,1,fp8,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,float16,0,0.044581333796183266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,float16,0,0.0708426684141159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,128,1,fp8,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,fp8,0,0.07232533395290375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,64,0,1,fp8,fp8,0,0.06824000179767609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,float16,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,float16,0,0.048207998275756836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,128,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,64,0,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,float16,0,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,128,1,fp8,fp8,0,0.029029332101345062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,64,0,1,fp8,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,float16,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,float16,0,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,128,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,fp8,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,64,0,1,fp8,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,128,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,64,0,1,fp8,fp8,0,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,64,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,float16,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,64,0,1,fp8,fp8,0,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,64,0,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,64,0,1,fp8,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,float16,0,0.10975466171900432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,float16,0,0.14060266812642416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,fp8,0,0.11058132847150166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,128,1,fp8,fp8,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,fp8,0,0.14321600397427878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,64,0,1,fp8,fp8,0,0.1360053320725759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,float16,0,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,float16,0,0.086709330479304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,fp8,0,0.06648000081380208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,128,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,fp8,0,0.08803733189900716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,64,0,1,fp8,fp8,0,0.08434133728345235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,float16,0,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,float16,0,0.08145066599051158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,fp8,0,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,128,1,fp8,fp8,0,0.05845333139101664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,fp8,0,0.08504000306129456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,64,0,1,fp8,fp8,0,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,float16,0,0.043434664607048035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,float16,0,0.05594133337338766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,128,1,fp8,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,64,0,1,fp8,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,float16,0,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,float16,0,0.05431999762852987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,fp8,0,0.04201066493988037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,128,1,fp8,fp8,0,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,fp8,0,0.056090667843818665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,64,0,1,fp8,fp8,0,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,float16,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,float16,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,fp8,0,0.04029866556326548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,64,0,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,float16,0,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,128,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,64,0,1,fp8,fp8,0,0.038191998998324074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,float16,0,0.036015999813874565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,128,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,64,0,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,float16,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,128,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,64,0,1,fp8,fp8,0,0.0317546675602595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,64,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,fp8,0,0.03313600023587545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,64,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,float16,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,fp8,0,0.03207999964555105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,64,0,1,fp8,fp8,0,0.030623999734719593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,float16,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,64,0,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,float16,0,0.1125866671403249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,float16,0,0.1341546674569448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,fp8,0,0.1116373340288798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,128,1,fp8,fp8,0,0.10727999607721965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,fp8,0,0.13264532883961996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,64,0,1,fp8,fp8,0,0.1267146666844686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,float16,0,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,float16,0,0.07923200229803722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,fp8,0,0.06632533172766368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,128,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,fp8,0,0.07831466694672902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,64,0,1,fp8,fp8,0,0.07672533392906189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,float16,0,0.06448533137639363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,float16,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,128,1,fp8,fp8,0,0.06244266529877981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,fp8,0,0.07639466722806294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,64,0,1,fp8,fp8,0,0.07278400162855785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,float16,0,0.049925332268079124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,128,1,fp8,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,fp8,0,0.050106664498647056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,64,0,1,fp8,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,float16,0,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,128,1,fp8,fp8,0,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,64,0,1,fp8,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,64,0,1,fp8,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,float16,0,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,128,1,fp8,fp8,0,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,64,0,1,fp8,fp8,0,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,float16,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,64,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,float16,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,64,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,64,0,1,fp8,fp8,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,float16,0,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,fp8,0,0.02202133337656657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,128,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,fp8,0,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,64,0,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,float16,0,0.021840001145998638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,float16,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,128,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,fp8,0,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,64,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,float16,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,64,0,1,fp8,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,float16,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,64,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,float16,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,float16,0,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,float16,0,0.11301333705584209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,fp8,0,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,128,1,fp8,fp8,0,0.10559466481208801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,fp8,0,0.11176000038782756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,64,0,1,fp8,fp8,0,0.1072106659412384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,float16,0,0.06446933249632518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,float16,0,0.06534933547178905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,128,1,fp8,fp8,0,0.06444266438484192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,fp8,0,0.06378666559855144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,64,0,1,fp8,fp8,0,0.06485333542029063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,float16,0,0.06248533229033152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,float16,0,0.06357866525650024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,fp8,0,0.06241066753864288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,128,1,fp8,fp8,0,0.06039999922116598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,fp8,0,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,64,0,1,fp8,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,float16,0,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,float16,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,128,1,fp8,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,float16,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,128,1,fp8,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,64,0,1,fp8,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,float16,0,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,128,1,fp8,fp8,0,0.02699733277161916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,64,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,128,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,fp8,0,0.028570666909217834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,64,0,1,fp8,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,float16,0,0.02460266649723053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,fp8,0,0.023669332265853882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,64,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,128,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,64,0,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,float16,0,0.022304000953833263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,fp8,0,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,128,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,128,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,64,0,1,fp8,fp8,0,0.0200853335360686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,float16,0,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,float16,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,128,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,64,0,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,float16,0,0.020367999871571858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,64,0,1,fp8,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,float16,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,128,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,64,0,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,float16,0,0.058448001742362976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,float16,0,0.058517331878344216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,128,1,fp8,fp8,0,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,fp8,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,64,0,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,fp8,0,0.03719466676314672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,128,1,fp8,fp8,0,0.0360000009338061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,fp8,0,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,64,0,1,fp8,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,float16,0,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,float16,0,0.035760000348091125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,128,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,64,0,1,fp8,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,64,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,float16,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,64,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,128,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,64,0,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,128,1,fp8,fp8,0,0.02072000006834666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,64,0,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,float16,0,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,fp8,0,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,64,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,float16,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,float16,0,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,64,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,128,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,64,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,float16,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,64,0,1,fp8,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,float16,0,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.18739734093348184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,float16,0,0.03862400104602178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,float16,0,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,128,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,fp8,0,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,64,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,float16,0,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,128,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,64,0,1,fp8,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,float16,0,0.025637333591779072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,fp8,0,0.025722667574882507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,64,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,float16,0,0.020394666741291683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,fp8,0,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,128,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,128,1,fp8,fp8,0,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,64,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,fp8,0,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,64,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,float16,0,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,128,1,fp8,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,fp8,0,0.031871999303499855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,64,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,64,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,64,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,128,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,float16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,64,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,128,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,64,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,128,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,64,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,64,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,128,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,128,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,128,1,fp8,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,64,0,1,fp8,fp8,0,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,128,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,64,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,fp8,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,64,0,1,fp8,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,64,0,1,fp8,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,float16,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,128,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,fp8,0,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,float16,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,64,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,64,0,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,64,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,float16,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,128,1,fp8,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,float16,0,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,float16,0,0.2909173369407654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,fp8,0,0.05004266897837321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,128,1,fp8,fp8,0,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,fp8,0,0.3152640064557393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,64,0,1,fp8,fp8,0,0.27085866530736286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,float16,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,float16,0,0.1974560022354126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,128,1,fp8,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,fp8,0,0.1974560022354126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,64,0,1,fp8,fp8,0,0.1837493379910787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,float16,0,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,float16,0,0.19122666120529175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,128,1,fp8,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,fp8,0,0.19364267587661743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,64,0,1,fp8,fp8,0,0.17908267180124918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,float16,0,0.18995734055836996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,fp8,0,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,128,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,fp8,0,0.19113600254058838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,64,0,1,fp8,fp8,0,0.17766932646433511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,float16,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,float16,0,0.15229333440462747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,128,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,fp8,0,0.15249600013097128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,64,0,1,fp8,fp8,0,0.1421280006567637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,float16,0,0.1483733355998993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,fp8,0,0.14854400356610617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,64,0,1,fp8,fp8,0,0.13825600345929465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,float16,0,0.04166933397452036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,float16,0,0.1434293289979299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,128,1,fp8,fp8,0,0.03925866633653641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,fp8,0,0.14409599701563516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,64,0,1,fp8,fp8,0,0.1339040001233419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,float16,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,float16,0,0.12988266348838806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,fp8,0,0.02884799987077713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,fp8,0,0.13009066383043924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,64,0,1,fp8,fp8,0,0.12156800429026286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,float16,0,0.12758400042851767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,128,1,fp8,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,fp8,0,0.1260479986667633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,64,0,1,fp8,fp8,0,0.11762666702270508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,float16,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,float16,0,0.16931732495625815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,128,1,fp8,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,fp8,0,0.17077332735061646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,64,0,1,fp8,fp8,0,0.15918933351834616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,float16,0,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,float16,0,0.11261333028475444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,128,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,fp8,0,0.11213333408037822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,64,0,1,fp8,fp8,0,0.10552000006039937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,float16,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,float16,0,0.10755733648935954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,fp8,0,0.107424000898997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,64,0,1,fp8,fp8,0,0.10113599896430969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,float16,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,float16,0,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,128,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,fp8,0,0.10518399874369304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,64,0,1,fp8,fp8,0,0.09717866778373718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,float16,0,0.11356799801190694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,128,1,fp8,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,fp8,0,0.11595199505488078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,64,0,1,fp8,fp8,0,0.10769066214561462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,float16,0,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,float16,0,0.08894399801890056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,128,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,fp8,0,0.08911466598510742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,64,0,1,fp8,fp8,0,0.08274133503437042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,float16,0,0.08492799599965413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,128,1,fp8,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,fp8,0,0.08502933382987976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,64,0,1,fp8,fp8,0,0.07874666651089986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,float16,0,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,float16,0,0.08282666901747386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,128,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,fp8,0,0.08286400139331818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,64,0,1,fp8,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,float16,0,0.046800002455711365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,float16,0,0.10582933823267619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,128,1,fp8,fp8,0,0.04604266583919525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,fp8,0,0.1064906617005666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,64,0,1,fp8,fp8,0,0.09925867120424907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,float16,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,float16,0,0.0703306645154953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,128,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,fp8,0,0.07050666709740956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,64,0,1,fp8,fp8,0,0.06513600051403046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,float16,0,0.0639466643333435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,fp8,0,0.026629333694775898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,128,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,fp8,0,0.06446399788061778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,64,0,1,fp8,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,float16,0,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,fp8,0,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,64,0,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,float16,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,64,0,1,fp8,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,float16,0,0.04271466533342997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,float16,0,0.07479466497898102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,fp8,0,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,128,1,fp8,fp8,0,0.04206933577855428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,fp8,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,64,0,1,fp8,fp8,0,0.072543998559316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,float16,0,0.02898666759332021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,float16,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,128,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,fp8,0,0.056186666091283165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,64,0,1,fp8,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,float16,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,128,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,fp8,0,0.05190933247407278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,64,0,1,fp8,fp8,0,0.04935466746489207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,float16,0,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,128,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,64,0,1,fp8,fp8,0,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,fp8,0,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,128,1,fp8,fp8,0,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,fp8,0,0.050613333781560264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,64,0,1,fp8,fp8,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,float16,0,0.04828266799449921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,float16,0,0.07458133498827617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,fp8,0,0.04818133513132731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,128,1,fp8,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,fp8,0,0.07574399809042613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,64,0,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,float16,0,0.049322664737701416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,128,1,fp8,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,64,0,1,fp8,fp8,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,float16,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,128,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,64,0,1,fp8,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,float16,0,0.041296000281969704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,128,1,fp8,fp8,0,0.021877333521842957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,64,0,1,fp8,fp8,0,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,float16,0,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,128,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,64,0,1,fp8,fp8,0,0.03722666700681051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,fp8,0,0.03812800099452337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,64,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,float16,0,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,float16,0,0.05610666672388712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,fp8,0,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,128,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,64,0,1,fp8,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,128,1,fp8,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,64,0,1,fp8,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,float16,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,float16,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,64,0,1,fp8,fp8,0,0.03398400048414866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,float16,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,128,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,64,0,1,fp8,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,64,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,float16,0,0.03200533241033554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,64,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,float16,0,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,float16,0,0.049685334165891014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,128,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,64,0,1,fp8,fp8,0,0.04774933556715647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,float16,0,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,float16,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,128,1,fp8,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,64,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,fp8,0,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,128,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,64,0,1,fp8,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,128,1,fp8,fp8,0,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,float16,0,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,128,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,64,0,1,fp8,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,64,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,64,0,1,fp8,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,float16,0,0.04165333261092504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,128,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,64,0,1,fp8,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,float16,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,float16,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,128,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,fp8,0,0.030991998811562855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,64,0,1,fp8,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,float16,0,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,128,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,float16,0,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,128,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,64,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,128,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,64,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,64,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,128,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,float16,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,64,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,float16,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,float16,0,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,128,1,fp8,fp8,0,0.017850667238235474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,128,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,128,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,64,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,float16,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,128,1,fp8,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,64,0,1,fp8,fp8,0,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,float16,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,128,1,fp8,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,128,1,fp8,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,64,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,float16,0,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,128,1,fp8,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,float16,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,64,0,1,fp8,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,fp8,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,float16,0,44.22041320800781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,fp8,0,44.13040669759115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,32.59239959716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,float16,0,43.12210591634115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,fp8,0,43.57295227050781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,32.69486999511719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,float16,0,42.58221944173177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,fp8,0,43.8626963297526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,32.86077372233073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,23.192454020182293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,23.680262247721355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,float16,0,0.2881386677424113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,22.636502583821613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,20.36944580078125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,16.639909108479817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,22.42730204264323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,22.75597890218099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,16.416180928548176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,22.521764119466145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,22.234644571940105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,16.795909881591797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,11.065483093261719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,11.7882448832194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,8.834885279337565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,11.483690897623697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,10.785029093424479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,8.420245488484701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,11.18276850382487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,11.368607838948568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,8.427802403767904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,11.224613189697266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,11.030928293863932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,8.633087793986002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,5.7844797770182295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,5.604901631673177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,4.665439923604329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,5.243770599365234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,5.23089599609375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,4.5363359451293945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,5.182346661885579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,5.433786392211914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,4.545178731282552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,5.576480229695638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,5.424181620279948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,4.567434628804524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,24.837621053059895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,24.530619303385418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,19.088890075683594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,25.392283121744793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,25.965840657552082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,19.61187744140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,26.098678588867188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,27.442105611165363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,13.236763000488281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,19.347877502441406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,10.270367940266928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,13.665674845377604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,13.716171264648438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,12.650469462076822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,float16,0,0.01989866668979327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,9.731157302856445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,12.74280039469401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,9.787002563476562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,12.48422368367513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,12.9824587504069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,12.423994700113932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,9.761088053385416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,7.2801869710286455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,5.34611701965332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,6.745626449584961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,6.3922882080078125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,5.90072504679362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,5.114768028259277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,5.979984283447266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,5.106111844380696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,5.979493459065755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,6.481472015380859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,3.326330820719401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,5.118010520935059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,2.9005279541015625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,3.3031040827433267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,3.1645758946736655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,5.7151947021484375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,2.8007787068684897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,3.1133174896240234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,3.2907145818074546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,3.4110240936279297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,2.791786511739095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,3.1836961110432944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,3.3749974568684897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,2.7961174647013345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,14.045173645019531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,18.38324737548828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,18.233642578125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,17.66981379191081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,14.080373128255209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,20.443108876546223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,18.70314661661784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,9.939498901367188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,18.205856323242188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,14.036986033121744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,9.899909337361654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,7.60748291015625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,8.727045059204102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,9.746623992919922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,7.109194437662761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,8.770010630289713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,9.114789326985678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,7.139285405476888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,8.952378590901693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,4.661445299784343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,9.510629018147787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,7.138608296712239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,4.385786692301433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,3.9432586034139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,4.279578526814778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,4.652565320332845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,3.738762537638346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,4.568496068318685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,3.7482401529947915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,4.379002571105957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,4.2835893630981445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,2.5471413930257163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,2.673760096232096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,4.512458801269531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,3.7549333572387695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,2.1641972859700522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,2.5513280232747397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,2.0873546600341797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,2.5399680137634277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,2.3035146395365396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,2.074127991994222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,2.515397389729818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,2.4229812622070312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,2.076927979787191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,2.3085546493530273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,18.951093037923176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,float16,0,25.765909830729168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,fp8,0,24.884262084960938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,float16,0,24.420127868652344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,19.04422378540039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,fp8,0,25.63452911376953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,float16,0,25.46215565999349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,fp8,0,25.83154551188151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,12.814778645833334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,19.03606414794922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,12.43838373819987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,12.190101623535156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,9.450170516967773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,11.81594721476237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,11.967263539632162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,12.271018981933594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,9.425301233927408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,9.448031743367514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,11.713461558024088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,12.844640096028646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,6.0519358317057295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,5.177040100097656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,5.594714482625325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,5.767978668212891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,4.871295928955078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,5.894629160563151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,5.840725580851237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,4.885557174682617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,5.58509890238444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,5.764485041300456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,3.2685759862264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,4.87552007039388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,3.09773858388265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,2.7463305791219077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,3.0758612950642905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,2.9427998860677085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,2.587007999420166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,2.9684267044067383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,3.015872001647949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,2.643130620320638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,3.0862773259480796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,2.9723361333211265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,1.7182399431864421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,2.630309263865153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,1.6945226987202961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,1.5265919367472331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,1.6521600087483723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,1.6296693483988445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,1.4562400182088215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,1.6624587376912434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,1.626911958058675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,1.7766879399617512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,1.6300746599833171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,1.6191840171813965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,1.514896074930827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,float16,0,14.367791493733725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,fp8,0,13.9781494140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,11.461109161376953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,float16,0,14.573893229166666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,fp8,0,14.032048543294271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,11.5099728902181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,float16,0,14.418885548909506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,fp8,0,15.157525380452475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,11.537855784098307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,7.379632314046224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,7.49510383605957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,6.332938512166341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,6.718896230061849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,7.3336747487386065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,5.810549418131511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,7.065205256144206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,6.984752019246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,5.841829299926758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,7.016266504923503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,7.036863962809245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,3.92301336924235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,5.848208109537761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,3.2830721537272134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,4.013701438903809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,3.433701197306315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,3.432016054789225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,3.04200013478597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,3.5223251978556314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,3.462554613749186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,3.283562660217285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,3.6358826955159507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,3.4678773880004883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,1.9265653292338054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,3.0437758763631186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,1.9503199259440105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,1.7557759284973145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,1.9272480010986328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,1.8409813245137532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,1.6359465916951497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,1.841477394104004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,1.8344693183898926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,1.7036693890889485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,1.8446027437845867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,1.845344066619873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,1.0945706367492676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,1.6486825942993164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,1.0956693490346272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,0.9978346824645996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,1.036725362141927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,1.0361440181732178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,0.9415573279062907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,1.0403253237406414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,1.0418186982472737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,0.9442400137583414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,1.04204265276591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,1.0415626366933186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,0.9449333349863688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,float16,0,14.159493764241537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,fp8,0,14.615819295247396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,11.949456532796225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,float16,0,14.49719492594401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,fp8,0,14.15451176961263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,11.926959991455078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,float16,0,14.555119832356771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,fp8,0,14.273316701253256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,7.622255961100261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,11.984048207600912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,8.121018727620443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,96,128,0,1,fp8,fp8,0,6.605557123819987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,6.824703852335612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,6.934234619140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,5.946240107218425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,7.196250915527344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,6.986549377441406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,5.980261484781901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,7.109338760375977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,7.123477300008138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,6.000815709431966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,3.798762639363607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,3.8214505513509116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,3.401930809020996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,3.5214452743530273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,3.619701385498047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,3.0492372512817383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,3.4881652196248374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,3.568842569986979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,3.0567731857299805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,3.525082588195801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,3.506394704182943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,1.9503679275512695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,3.270298639933268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,1.7814240455627441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,2.0177653630574546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,1.8090027173360188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,1.874527931213379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,1.6080959637959797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,1.8643199602762859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,1.8213440577189128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,1.612613360087077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,1.8218080202738445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,1.8172106742858887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,1.626512050628662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,1.0764053662618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,0.97215469678243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,0.9865919748942057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,1.045034646987915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,0.9880373477935791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,0.9886613686879476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,0.8890986442565918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,0.9907039801279703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,0.8917226791381836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,0.9974079926808676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,0.9962133566538492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,0.8946560223897299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,0.6113386551539103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,0.6230080127716064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,0.5687146584192911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,0.5780746539433798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,0.5832533439000448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,0.5299893220265707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,0.5806080102920532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,0.5319679975509644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,0.583786686261495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,0.5824319918950399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,0.5316586494445801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,0.5884639819463094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,fp8,0,8.756570816040039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,float16,0,8.728591918945312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,7.591056187947591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,7.625024159749349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,float16,0,8.843178431193033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,fp8,0,9.14077885945638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,float16,0,8.90000025431315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,7.664693196614583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,fp8,0,9.06000010172526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,4.898730595906575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,5.002095858256022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,4.317071914672852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,4.312944094340007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,3.8236106236775718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,4.555610656738281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,4.290810585021973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,3.8336000442504883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,4.390469233194987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,4.338042577107747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,3.8612000147501626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,4.493493398030599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,2.427231947580973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,2.514421304066976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,2.2282773653666177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,2.2064639727274575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,1.9761813481648762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,2.2632373174031577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,2.236266613006592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,1.9872320493062336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,2.2471946080525718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,2.2394399642944336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,2.053424040476481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,1.2794186274210613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,1.309648036956787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,1.189029296239217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,1.1811786492665608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,1.1747626463572185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,1.0540640354156494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,1.1755092938741047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,1.1867893536885579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,1.0567999680836995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,1.1877919832865398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,1.185647964477539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,1.0689386526743572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,0.7040586471557617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,0.7239573001861572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,0.6521120071411133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,0.6573706865310669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,0.6563200155893961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,0.5938400030136108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,0.6556746562321981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,0.6603039900461832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,0.5946400165557861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,0.6609760125478109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,0.662282665570577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,0.5980426470438639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,0.4187999963760376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,0.426144003868103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,0.3924479881922404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,0.3919626474380493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,0.39185599486033124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,0.36348267396291095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,0.3930399815241496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,0.39441601435343426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,0.36192532380421955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,0.39588268597920734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,0.3961973190307617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,0.36478400230407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,float16,0,9.54800542195638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,fp8,0,9.677685419718424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,8.4082400004069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,float16,0,9.92962646484375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,fp8,0,9.572863896687826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,8.448223749796549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,float16,0,9.605226516723633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,fp8,0,9.700511932373047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,8.511322657267252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,5.305983861287435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,5.405349095662435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,4.719205220540364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,4.739407857259114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,4.177791913350423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,4.7543840408325195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,4.823786735534668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,4.196394602457683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,4.757898648579915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,4.786282539367676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,4.225381215413411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,2.701077461242676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,2.7461280822753906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,2.5633813540140786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,2.40611203511556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,2.414186636606852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,2.1335253715515137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,2.412010669708252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,2.4232053756713867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,2.1402133305867515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,2.4231467247009277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,2.4371360143025718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,2.153872013092041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,1.3980159759521484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,1.4295946756998699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,1.2820533116658528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,1.253717343012492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,1.2588213284810383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,1.114367961883545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,1.258464018503825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,1.2645760377248128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,1.1192906697591145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,1.2641653219858806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,1.2712213198343914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,1.1269919872283936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,0.7454506556193033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,0.7618933518727621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,0.6861279805501302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,0.6750880082448324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,0.6770239671071371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,0.602944016456604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,0.6794559955596924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,0.6823573112487793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,0.606826663017273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,0.6816960175832113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,0.6861386299133301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,0.6105013291041056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,0.42020265261332196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,0.4294559955596924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,0.3898133436838786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,0.3838239908218384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,0.386186679204305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.3490346670150757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,0.38629865646362305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,0.38765867551167804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,0.3494346539179484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,0.38838934898376465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,0.3903839985529582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,0.35260268052419025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,0.25522667169570923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,0.2600746750831604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.24174400170644125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.23491734266281128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.23546665906906128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.2198986609776815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.23451733589172363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.2354080080986023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.2201546629269918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.23665066560109457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.23846399784088135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.22402666012446085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,float16,0,6.393829345703125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,fp8,0,6.413871765136719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,5.630869547526042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,float16,0,6.421738942464192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,fp8,0,6.452149073282878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,5.663322448730469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,float16,0,6.49995231628418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,fp8,0,6.53770128885905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,5.701503753662109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,3.667738596598307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,3.7058080037434897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,3.300527890523275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,3.19979190826416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,3.212191899617513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,2.820554733276367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,3.2113119761149087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,3.2255681355794272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,2.831461270650228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,3.2329654693603516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,3.250783920288086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,2.8554986317952475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,1.851594607035319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,1.885632038116455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,1.6897172927856445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,1.6385173797607422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,1.6447146733601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,1.4461973508199055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,1.644997278849284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,1.6525279680887859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,1.4535253842671711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,1.655194600423177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,1.664698600769043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,1.4647413889567058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,0.9637653032938639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,0.9858986536661783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,0.8858826955159506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,0.8597546418507894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,0.8640480041503906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,0.7626293500264486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,0.8616000016530355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,0.865285317103068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,0.764789342880249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,0.8678293228149414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,0.8730239868164062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,0.7710613409678141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,0.5188800096511841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,0.5318933327992758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,0.4798026482264201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,0.4677813450495402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,0.470085342725118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,0.4182933171590169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,0.46959467728932697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,0.4724586804707845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,0.42101867993672687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,0.47147734959920246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,0.4745599826176961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,0.422544002532959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,0.2982880075772603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,0.30616533756256104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,0.2770559986432393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.26948267221450806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.27033066749572754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.24569600820541382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.2696373263994853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.27027199665705365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.2469866673151652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.27286932865778607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.27452800671259564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.24890132745107016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.18645334243774414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.1909439961115519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.1750026742617289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.16795732577641806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.1687893271446228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.15446399648984274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.16716265678405762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.1684053341547648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.15446399648984274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.16945600509643555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.1709386706352234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.15691733360290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,float16,0,7.158810933430989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,fp8,0,7.093194961547852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,6.5054931640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,float16,0,7.155962626139323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,fp8,0,7.194447835286458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,7.174943923950195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,float16,0,7.270613352457683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,fp8,0,7.285306930541992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,7.02564811706543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,4.165813446044922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,4.0824480056762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,3.4661385218302407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,3.4666293462117515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,3.138357480367025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,3.526714642842611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,3.5894505182902017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,3.58951473236084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,3.607130686442057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,3.6502081553141275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,3.4603360493977866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,2.031914710998535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,2.0149547259012857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,1.9643360773722331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,1.7386293411254883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,1.7385226885477703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,1.5824480056762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,1.7540480295817058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,1.7551147143046062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,1.7686667442321777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,1.7751572926839192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,1.779962698618571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,1.6642506917317708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,1.0324052969614665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,1.0112160046895344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,0.9875520070393881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,0.8845280011494955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,0.8858880201975504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,0.8051253159840902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,0.890501340230306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,0.8906026681264242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,0.8155786991119385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,0.9016640186309814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,0.9023146629333496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,0.8275307019551595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,0.5339359839757284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,0.5206826527913412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,0.5109653472900391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,0.46007466316223145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,0.4610079924265544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,0.4155786832173665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,0.46270934740702313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,0.46319464842478436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,0.41817065080006915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,0.46722133954366046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,0.4701013167699178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,0.43013866742451984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,0.2858560085296631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,0.27743999163309735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,0.26876266797383624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.24490666389465332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.24467732508977255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.2207253376642863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,0.249071995417277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.2478826642036438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.22380266586939493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,0.24733867247899374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,0.24728000164031982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.2283466657002767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.1604213317235311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.1545919974644979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.14874133467674255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.13362666964530945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.1337279975414276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.12129599849383037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.13447466492652893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.1330453356107076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.12136000394821167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.13614933689435324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.13622933626174927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.12617599964141846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.09455466270446777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.09037333726882935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.09037333726882935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.08099199831485748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.08092266817887624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.08053866525491078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.07067733506361644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.08102933565775554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.08132799963156383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.07236800094445546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,float16,0,6.228890736897786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,float16,fp8,0,6.129760106404622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,5.652778625488281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,6.64633051554362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,float16,0,6.26093864440918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,float16,fp8,0,6.245786666870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,6.299530665079753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,float16,0,6.339103698730469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,float16,fp8,0,6.332256317138672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,6.073525110880534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,3.6752373377482095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,3.5608479181925454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,3.0177440643310547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,2.985722541809082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,2.7192532221476235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,3.052997271219889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,3.09770139058431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,3.144517262776693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,3.1615308125813804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,3.1738719940185547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,3.0048745473225913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,1.7924319903055828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,1.7717013359069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,1.745541254679362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,1.5048425992329915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,1.503328005472819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,1.361845334370931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,1.522106647491455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,1.5229546229044597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,1.5420533816019695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,1.5357972780863445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,1.552901268005371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,1.4418613115946453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,0.9034240245819092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,0.8836800257364908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,0.8757812976837158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,0.764847993850708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,0.7664480209350586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,0.693120002746582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,0.7712213198343912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,0.7705492973327637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,0.7084213097890218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,0.7803146839141846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,0.7815199693044027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,0.7160373528798422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,0.4684000015258789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,0.45578134059906006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,0.44947199026743573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,0.3957600196202596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,0.4002079963684082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,0.35660799344380695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,0.39946667353312176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,0.4008266528447469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,0.3608479897181193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,0.4028533299763997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,0.40243732929229736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,0.3693600098292033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,0.2495786746342977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,0.2432053287823995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,0.23918400208155313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.21163199345270792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.21080533663431802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.18926399946212769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.21317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.21333332856496176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.19108800093332926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.21358933051427206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.2145706613858541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.1956160068511963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.13730133573214212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.1341973344484965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.13172266880671182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.11426666378974915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.11427199840545654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.10150933265686035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.11372799674669902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.11387200156847636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.10421866178512573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.11569600303967793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.11559466520945232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.10564800103505452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.08125333487987518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.07886399825414021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.0806826651096344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.06888000170389812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.06880533198515575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.06843199829260509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.06069866816202799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.06969599922498067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.06987200180689494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.061861331264177956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.048245335618654885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.04644800225893656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.046122665206591286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.04663999875386556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.04651199777921041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,2.7271572748819985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,2.7241973876953125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,2.5326293309529624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,2.758687973022461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,2.8029867808024087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,2.8883466720581055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,2.911264101664225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,2.9133173624674478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,2.845973332722982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,1.6562879880269368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,1.6255200703938801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,1.6224053700764973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,1.3765014012654622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,1.3746506373087566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,1.2397226492563884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,1.3876746495564778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,1.3896373112996419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,1.4309813181559246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,1.4056800206502278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,1.407466729482015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,1.3220053513844807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,0.8314239978790283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,0.8157760302225748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,0.8155146439870199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,0.7006080150604248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,0.6987679799397787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,0.6290880044301351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,0.7030346393585205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,0.7051466306050619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,0.6430453459421793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,0.712602694829305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,0.7153333028157552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,0.656986673672994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,0.4363413254419963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,0.42133867740631104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,0.4188106854756673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,0.36163731416066486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,0.3624639908472697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.32475199302037555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,0.36363200346628827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,0.36370666821797687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.3293439944585164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,0.3661653200785319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,0.3668479919433594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.3381919860839844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,0.23139200607935587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,0.2255573272705078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,0.2257066567738851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.19476799170176187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.19461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.1735360026359558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.19483733177185059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.19583467642466226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.1760693391164144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.19632534186045328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.19654399156570435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.18014399210611978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.12924800316492716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.12628266215324402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.12379200259844463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.10590933760007222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.10786666472752889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.09475200374921162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.10665599505106609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.10831466317176819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.10920000076293945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.1085599958896637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.0992746651172638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.07138133545716603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.0713973343372345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.07432533303896587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.0609493354956309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.06117866436640421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.05457599957784017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.06118933359781901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.062650665640831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.05547733108202616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.062234664956728615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.05482666691144308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.044138665000597634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.04215999941031138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.037952000896135964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.0432586669921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.04264000058174133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.02849599967400233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.0284853329261144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,1.5019307136535645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,1.4980160395304363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,1.3610506057739258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,1.5149866739908855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,1.5112160046895344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,1.547690709431966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,1.5421387354532878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,1.551045258839925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,1.421573321024577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,0.895471970240275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,0.8754719893137614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,0.7602720260620117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,0.7599573135375977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,0.8713866869608561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,0.7033546765645345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,0.765221357345581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,0.7152427037556967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,0.7908053398132324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,0.7666292985280355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,0.7754293282826742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,0.7137227058410645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,0.4556800127029419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,0.4504479964574178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,0.45443201065063477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,0.3896586497624715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,0.39134931564331055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.3514080047607422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,0.39606932799021405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,0.39204267660776776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.3579626480738322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,0.3968000014623006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,0.3993653456370036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.365941325823466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,0.24167466163635254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,0.23543467124303183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,0.2372586727142334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.20538665850957236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.20723734299341837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.1854133407274882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.20771199464797974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.20638932784398398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.18827199935913086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.20969067017237344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.210698664188385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.193231999874115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.13199466466903687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.1292746663093567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.12845866878827414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.11165866255760193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.11142399907112122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.11410666505495708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.11362133423487346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.10273067156473796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.11335999766985576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.11514666676521301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.10690666238466899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.07679999868075053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.07589333256085713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.07703466713428497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.06555733581384023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.06741333504517873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.06634666522343953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.05973866581916809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.06663466493288676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.05962666869163513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.04341333111127218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.04308799902598063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.04062933226426443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.040762667854626976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.03687999894221624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,0.964464028676351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,0.9634826978047689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,0.8903253078460693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,0.9710986614227295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,0.9793813228607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,0.9470400015513102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,0.9792106946309408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,0.9788586298624674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,0.9098773002624512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,0.5581706762313843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,0.557808001836141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,0.5424319903055826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,0.49017600218455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,0.49720533688863117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.450111985206604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,0.49300265312194824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,0.49456000328063965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.4580800135930379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,0.5101866722106934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,0.49703999360402423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.46061865488688153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,0.28994133075078327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,0.28996266921361286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,0.28379732370376587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.2559093236923218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.25628799200057983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.23437867561976114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.25705599784851074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.25649599234263104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.23682133356730142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.2596000035603841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.2592373291651408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.24127467473347983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.15439466635386148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.15294399857521057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.15272000432014465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.1384213368097941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.13857600092887878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.1251359979311625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.13792533675829569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.1378613313039144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.12613333264986673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.13884799679120383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.13929599523544312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.13006400068600973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.08744000395139058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.08587732911109924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.0879306693871816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.07876266539096832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.06867200136184692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.07715733349323273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.07702933251857758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.07867200175921123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.07876266539096832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.07043733199437459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.05020800232887268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.049738665421803795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.04730133215586344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.0473280002673467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.04283200204372406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.04817600051561991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.04828266799449921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.03124266614516576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.029887999097506206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.02420799930890401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,0.7057759761810303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,0.7048693497975668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.6592373450597128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,0.7053493658701578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,0.708570639292399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.6683733463287354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,0.7105387051900228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,0.7098399798075358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.6709280014038086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,0.40462398529052734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,0.39058132966359455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,0.38682134946187335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.36297067006429035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.36138665676116943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.33767998218536377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.363103985786438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.3634026845296224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.341599980990092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.36517333984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.36509867509206134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.3454879919687907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.20613867044448853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.2038559913635254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.2037013371785482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.1908586621284485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.18969599405924478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.17493333419164023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.19076265891393027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.19026132424672446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.17871999740600586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.19171200195948282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.19157866636912027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.18238399426142374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.11327466368675232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.11243200302124023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.11382933457692464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.10403199990590413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.10462400317192078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.09541333715120952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.1039680043856303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.10382933417956035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.09546132882436116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.1048906644185384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.10419199864069621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.09570133686065674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.06317333380381267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.056346664826075234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.06061333417892456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.05596266686916351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.06031466523806254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.05669866502285004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.03924266745646795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.037952000896135964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.038773333032925926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.03932799895604452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.03895466774702072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.03623466690381368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,2.2490293184916177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.021829334398110706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.576634685198466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.5712426503499349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.5497706731160482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.575823982556661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.5761813322703043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.555077314376831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.5772746801376343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.5781813462575277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.5583200057347616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.31174399455388385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.31034666299819946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.30931200583775836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.29662400484085083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.2973066568374634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.2834080060323079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.2965973416964213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.29685866832733154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.28571732838948566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.29789332548777264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.29785066843032837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.2879839936892192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.1646293302377065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.16434666514396667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.1648319959640503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.1553813318411509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.15647466977437338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.14756799737612405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.1562026639779409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.15895467003186545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.14698132872581482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.15692800283432007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.15737066666285196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.1487626632054647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.08925867080688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.08918933073679607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.08668800195058186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.08700799942016602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.08764800429344177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.08269333342711131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.08708266417185466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.08155733346939087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.08689600229263306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.08699199557304382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.08118933439254761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.05266133447488149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.05005866785844167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.05202133456865946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,28.34874216715495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,29.395487467447918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,21.493087768554688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,29.608240763346355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,29.662841796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,21.78753153483073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,29.868601481119793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,29.238901774088543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,21.591232299804688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,30.3065923055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,27.584927876790363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,22.021995544433594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,15.636714935302734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,15.47335942586263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,11.417930603027344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,14.969893137613932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,14.936981201171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,10.853946685791016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,15.132816314697266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,15.160858154296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,10.901915232340494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,14.699012756347656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,15.614879608154297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,10.832186381022135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,14.455338795979818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,15.485535939534506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,10.883455912272135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,7.0478560129801435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,7.155408223470052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,5.871370951334636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,6.711008071899414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,7.170090357462565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,5.71458117167155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,7.572394688924153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,7.812090555826823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,5.6421763102213545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,7.152778625488281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,7.172773361206055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,5.639269510904948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,6.951536178588867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,7.359418869018555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,5.663829167683919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,3.7152748107910156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,3.663968086242676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,3.469349225362142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,3.4873921076456704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,3.5373706817626953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,3.358901341756185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,3.4048213958740234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,3.6808907190958657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,3.063157399495443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,3.5189119974772134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,3.431621233622233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,3.0495786666870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,3.579279899597168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,3.603146553039551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,3.055413246154785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,17.02149836222331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,17.567967732747395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,12.74228286743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,17.462010701497395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,17.92889658610026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,12.837594350179037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,17.39643732706706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,17.452901204427082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,12.806357065836588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,17.004047393798828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,17.851861317952473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,12.882490793863932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,9.426191965738932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,8.79958407084147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,6.873600006103516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,8.137151718139648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.04041599979003271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,8.186346689860025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,6.482826868693034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,8.00158945719401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,8.466757456461588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,6.538298924763997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,8.516522725423178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,8.251487731933594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,6.536687850952148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,8.610671997070312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,8.004159927368164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,6.547568003336589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,4.452186584472656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,4.073610623677571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,3.5779412587483725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,3.938277244567871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,3.8583412170410156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,3.4155521392822266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,4.057797431945801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,3.909008026123047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,3.4152905146280923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,4.281445185343425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,4.139957427978516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,3.4229758580525718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,3.9114561080932617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,4.242608070373535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,3.6157652537027993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,2.4024693171183267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,2.1788479487101235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,2.250330607096354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,2.110213279724121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,2.072864055633545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,2.1137919425964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,2.1806880633036294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,2.1371307373046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,2.010709285736084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,2.103333314259847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,2.0876693725585938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,1.9607307116190593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,2.1578453381856284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,2.1086079279581704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,1.939909299214681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,12.349360148111979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,11.555589040120443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,9.290634791056315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,11.433589935302734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,11.756608327229818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,9.294677098592123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,12.573866526285807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,9.286341349283854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,11.577285766601562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,12.557029724121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,9.261845270792643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,11.324612935384115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,6.603818893432617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,5.751951853434245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,5.236298561096191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,4.749871889750163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,5.874698638916016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,5.753018697102864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,5.615034739176433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,4.757376035054524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,5.414047876993815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,6.12447992960612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,5.783866882324219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,4.7904478708903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,4.757850646972656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,5.562666575113933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,6.302426656087239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,2.948981285095215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,2.9909868240356445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,2.644277254740397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,2.858469327290853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,2.85480531056722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,2.5099093119303384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,2.9679412841796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,2.5110185941060386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,2.8875786463419595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,2.9117600123087564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,3.0207732518514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,2.64794127146403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,2.536698659261068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,3.1609598795572915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,2.939589182535807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,1.6181599299112956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,1.637221336364746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,1.6668480237325032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,1.5610666275024414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,1.3966612815856934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,1.5357012748718262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,1.5945599873860676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,1.4259732564290364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,1.567098617553711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,1.5644639333089192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,1.5452747344970703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,1.4758987426757812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,1.646015961964925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,1.4072213172912598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,1.5494453112284343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,12.381348927815756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,15.997386932373047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,15.463333129882812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,16.102474212646484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,12.492095947265625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,16.42199961344401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,12.524191538492838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,16.864400227864582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,18.03384017944336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,12.638912200927734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,16.41482671101888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,16.713802337646484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,8.772064208984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,6.711434682210286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,8.58191998799642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,7.207605361938477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,6.2665971120198565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,7.175514856974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,6.343376159667969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,8.52293332417806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,7.994784037272136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,7.610517501831055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,6.276863733927409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,8.80072021484375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,7.623226801554362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,7.9845015207926435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,6.308597564697266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,3.459696133931478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,4.185338656107585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,4.646058718363444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,3.768416086832682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,3.234485308329264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,3.688144048055013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,3.989834785461426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,3.263690630594889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,3.84004815419515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,3.714757283528646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,3.259455998738607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,3.8356641133626304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,3.430368105570475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,4.01417605082194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,2.0364480018615723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,2.136949380238851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,1.9952534039815266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,2.0971892674764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,1.9374400774637859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,1.881989320119222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,1.9494187037150066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,1.9564107259114583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,1.8702027002970378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,1.9956213633219402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,2.0090719858805337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,1.777898629506429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,1.9574400583902996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,1.9734667142232258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,1.7495306332906086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,1.1569973627726238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,1.1372106870015461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,1.1045013268788655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,1.0914933681488037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,1.0926720301310222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,1.0008906523386638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,1.0989973545074463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,1.0881386597951253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,1.0092426935831706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,1.0893066724141438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,1.0915253162384033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,0.9898133277893066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,1.0932213465372722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,1.0918292999267578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,0.9899360338846842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,8.862533569335938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,9.298826853434244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,7.614725112915039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,9.842538833618164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,9.161077499389648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,7.611242930094401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,9.005365371704102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,9.55730120340983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,7.631711959838867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,9.005850474039713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,9.384549458821615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,7.664735794067383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,4.686762809753418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,5.161808013916016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,4.229743957519531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,4.865472157796224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,4.481733322143555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,3.8617172241210938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,4.675946553548177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,4.397679964701335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,3.86849053700765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,4.457711855570476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,4.49674129486084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,3.8945865631103516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,4.661557197570801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,4.943685213724772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,2.477008024851481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,3.898949305216471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,2.1889440218607583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,2.5320372581481934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,2.2684106826782227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,2.2744320233662925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,2.0148746172587075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,2.40174929300944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,2.2990986506144204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,2.531269391377767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,2.331706682840983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,2.284773349761963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,2.139824072519938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,2.321957270304362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,2.3077866236368814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,2.029520034790039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,1.2890026569366455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,1.3069600264231365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,1.1838239828745525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,1.2197173436482747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,1.2232320308685303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,1.1136319637298584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,1.2227146625518799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,1.2249759833017986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,1.101744016011556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,1.2236053148905437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,1.2280266284942627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,1.1557066440582275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,1.2311039765675862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,1.2270452976226807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,1.1094880104064941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,0.7328693072001139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,0.7420480251312256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,0.6809653441111246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,0.6999253431955973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,0.7029600143432617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,0.6412800153096517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,0.6997706890106201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,0.7004586855570475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,0.6422719955444336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,0.7033226490020752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,0.7050666809082031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,0.6449280182520548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,0.7051946322123209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,0.7077333132425944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,0.6473546822865804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,9.17365837097168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,8.925445556640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,7.8723093668619795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,9.013466517130533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,7.88215446472168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,9.3680051167806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,9.051520029703775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,9.45095443725586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,7.888346354166667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,9.397317250569662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,9.977301279703775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,4.865733464558919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,7.944565455118815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,5.111008008321126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,4.3761491775512695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,4.636879920959473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,4.456063906351726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,3.9260212580362954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,4.536618550618489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,4.574351946512858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,3.9360853830973306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,4.4981387456258135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,4.586463928222656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,3.9527359008789062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,4.527050654093425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,4.529210726420085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,3.983029365539551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,2.4920907020568848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,2.5268747011820474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,2.3766239484151206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,2.2779787381490073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,2.2618613243103027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,2.0198559761047363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,2.29475736618042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,2.2864960034688315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,2.058629353841146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,2.2762880325317383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,2.2881919542948403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,2.0365920066833496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,2.386693318684896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,2.3042027155558267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,2.0440212885538735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,1.317402680714925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,1.3211572964986165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,1.2233440081278484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,1.2046613693237305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,1.2225440343221028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,1.1125600337982178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,1.1981600125630696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,1.2072959740956624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,1.0778506596883137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,1.2103626728057861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,1.1995573043823242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,1.1036746501922607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,1.2041813532511394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,1.2318933010101318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,1.0968266328175862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,0.7114720344543457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,0.7263147036234537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,0.6559040149052938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,0.6603039900461832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,0.6628053188323975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,0.6017493406931559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,0.6611200173695883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,0.6634666522343954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,0.6028906504313151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,0.6640373468399048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,0.666096011797587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,0.6038026809692383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,0.6666826407114664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,0.6688000361124674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,0.607093334197998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,0.41810135046641034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,0.42396267255147296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,0.39161066214243573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,0.3907626469930013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,0.3901013135910034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,0.3633066813151042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,0.39081064860026044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,0.39075732231140137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,0.36563201745351154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,0.39181868235270184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,0.3945759932200114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,0.3657919963200887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,0.3974986473719279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,0.39796264966328937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,0.3675626516342163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,5.590000152587891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,5.6758772532145185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,5.005237261454265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,5.623237609863281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,5.835717519124349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,5.036266644795735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,5.65725835164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,5.046298662821452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,5.831989288330078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,5.793013254801433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,5.110170682271321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,3.166293462117513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,3.2283573150634766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,2.863674799601237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,2.833317438761393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,2.8466774622599282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,2.6482933362325034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,2.8335307439168296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,2.8396854400634766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,2.6419572830200195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,2.8643627166748047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,2.8788534800211587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,2.5460640589396157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,2.861845334370931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,2.8661972681681314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,2.5605600674947104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,1.615898609161377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,1.678010622660319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,1.518602689107259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,1.4749493598937988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,1.4655040105183919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,1.3118986288706462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,1.4651625951131184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,1.4790879885355632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,1.3171839714050293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,1.4706133206685383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,1.4790132840474446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,1.3214506308237712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,1.4756959279378254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,1.4942720731099446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,1.332149346669515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,0.8552800019582113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,0.8727946281433105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,0.79093337059021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,0.7809279759724935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,0.7845919926961263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,0.7074080308278402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,0.7842773596445719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,0.7898293336232504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,0.7068533102671305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,0.7816853523254395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,0.792949358622233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,0.7105759779612223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,0.7893119653066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,0.7920053005218506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,0.7143306732177734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,0.476421316464742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,0.485973318417867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,0.445029338200887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,0.4387306769688924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,0.4400533437728882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,0.40400532881418866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,0.44124265511830646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,0.4428746700286865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,0.4050186475118001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,0.44255467255910236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,0.44357868035634357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,0.40434666474660236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,0.4452960093816121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,0.44734398523966473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,0.4089813232421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,0.28730666637420654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,0.2931999961535136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.2728479901949565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,0.2635573347409566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,0.265669325987498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.24864532550175986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,0.2648373246192932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,0.2646239995956421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.24845333894093832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,0.26502933104832965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,0.26708799600601196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.2510133385658264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,0.26926400264104206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,0.27051732937494916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.2529013355573018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,6.190656026204427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,6.1956532796223955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,5.5408376057942705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,6.200122833251953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,6.217162450154622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,5.548447926839192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,6.2080637613932295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,6.237093607584636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,5.570149103800456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,6.420757293701172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,6.283391952514648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,5.652981440226237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,3.4961493810017905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,3.544330596923828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,3.194352149963379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,3.0582399368286133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,3.0681279500325522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,2.755728085835775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,3.0770079294840493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,3.081424077351888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,2.7620534896850586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,3.0978078842163086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,3.1071093877156577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,2.783461252848307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,3.105226516723633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,3.1286452611287436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,2.8064212799072266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,1.7736907005310059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,1.813045342763265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,1.6338879267374675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,1.5641066233317058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,1.5701227188110352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,1.4094239870707195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,1.5678772926330566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,1.5750239690144856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,1.4154826800028484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,1.5769920349121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,1.584181308746338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,1.4245279630025227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,1.590021292368571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,1.5985387166341145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,1.4372906684875488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,0.922714630762736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,0.9409386316935221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,0.8568053245544434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,0.8229119777679443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,0.8268427054087321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,0.7456639607747396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,0.8235146999359131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,0.8274827003479004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,0.7449333667755127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,0.8257386684417725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,0.8309866587320963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,0.7491839726765951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,0.8343626658121744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,0.8390026887257894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,0.7553919951121012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,0.49556267261505127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,0.5077386697133383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,0.46397864818573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,0.4474453528722127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,0.449343999226888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,0.40830934047698975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,0.4472053448359172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,0.44977064927419025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,0.40855999787648517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,0.4495573441187541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,0.4527573188145955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,0.4113599856694539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,0.45263465245564777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,0.456005334854126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,0.4140640099843343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,0.28507200876871747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,0.2916799982388814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,0.26934399207433063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.2535253365834554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,0.25516800085703534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.23880000909169516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,0.2550826668739319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.25704000393549603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.23974400758743286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,0.25835732618967694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,0.26017600297927856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.24084800481796265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,0.2600053350130717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,0.26260799169540405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.2427039941151937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.17747733990351358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.18055999279022217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.1691946585973104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.1595306694507599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.16109333435694376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.15147200226783752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.159360001484553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.16014400124549866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.15198399623235068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.1609760026137034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.1623360017935435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.15221866965293884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.16035733620325723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.16192533572514853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.1534986694653829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,4.116160074869792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,4.118149439493815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,3.717967987060547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,4.118730545043945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,4.138309478759766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,3.7380053202311196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,4.147493362426758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,4.157466570536296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,3.7416321436564126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,4.182655970255534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,4.215445200602214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,3.793418566385905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,2.3736960093180337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,2.4135947227478027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,2.1877493858337402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,2.057744026184082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,2.066650708516439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,1.8590614000956218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,2.063418706258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,2.0745867093404136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,1.8648212750752766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,2.073023955027262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,2.085125287373861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,1.8764266967773438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,2.0935734113057456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,2.1068426767985025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,1.8962666193644206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,1.2139093081156414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,1.2390453020731609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,1.123855988184611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,1.0614133675893147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,1.0682986577351887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,0.9605120023091634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,1.0658293565114338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,1.0704747041066487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,0.9617119630177816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,1.0707306861877441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,1.0775520006815593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,0.967578649520874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,1.0819786389668782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,1.0887146790822346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,0.976576010386149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,0.6387413342793783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,0.6511946519215902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,0.5947786569595337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,0.5630506674448649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,0.5654613176981608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,0.5098026593526205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,0.5641493399937948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,0.5676480134328207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,0.5116000175476074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,0.5676853259404501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,0.5705600182215372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,0.5131893157958984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,0.5710986852645874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,0.5754239956537882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,0.5187733173370361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,0.3481546640396118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,0.35627734661102295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,0.3272533416748047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,0.3083253304163615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,0.3086453278859456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.28339733680089313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,0.3104213277498881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,0.3108479976654053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.2839146653811137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,0.3104213277498881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,0.31199999650319415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.285370667775472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,0.31550933917363483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,0.3161919911702474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.28934399286905926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,0.20318400859832764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.20729066928227743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.19133333365122476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.17564799388249716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.17661333084106445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.16631999611854553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.17749333381652832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.17778666814168295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.1681706706682841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.17892267306645712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.1795733372370402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.16993600130081177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.181002676486969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.18140800793965658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.17137066523234049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.12774399916330972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.12966932853062949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.12497066458066304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.11772800485293071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.11808533469835918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.11288000146547954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.11712533235549927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.11735999584197998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.11332799990971883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.11752532919247945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.11713600158691406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.11158933242162068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.1172266701857249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.11753599842389424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,4.466597239176433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,4.401653289794922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,4.135135968526204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,4.470266660054524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,4.460021336873372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,4.362042744954427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,4.527935981750488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,4.5317385991414385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,4.471391995747884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,4.611983935038249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,4.600757280985515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,4.927599906921387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,2.6415732701619468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,2.579477310180664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,2.5975359280904136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,2.1940693855285645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,2.19540802637736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,2.0798826217651367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,2.2247519493103027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,2.214831988016764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,2.1563733418782554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,2.2250026067097983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,2.254026730855306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,2.1625280380249023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,2.302453358968099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,2.2767626444498696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,2.4721226692199707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,1.3027413686116536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,1.30512531598409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,1.2842079798380535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,1.1123840014139812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,1.1120106379191081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,1.055344025293986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,1.1235040028889973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,1.1283199787139893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,1.0686293443044026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,1.1293013095855713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,1.1368746757507324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,1.083077351252238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,1.1448960304260254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,1.1366613705952961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,1.227509339650472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,0.667744000752767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,0.6604426701863607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,0.6519893407821655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,0.5768320163091024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,0.5709333419799805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,0.5385493437449137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,0.5782719850540161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,0.5778559843699137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,0.5478826761245728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,0.5831520160039266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,0.5802719990412394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,0.5547840197881063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,0.5895839929580688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,0.590282678604126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,0.6015733480453491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,0.3419040044148763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,0.3401333491007487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,0.30145599444707233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,0.3012640078862508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.2803093393643697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,0.30212799708048504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,0.30287466446558636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.2898186643918355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,0.30338666836420697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,0.3038880030314128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.2899786631266276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,0.3109813332557678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,0.3102026581764221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,0.2957013249397278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,0.19182399908701578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,0.1881813406944275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.184879998366038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.16403200229008993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.1638879974683126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.1546933352947235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.1685546636581421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.1627786656220754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.15294399857521057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.1657813290754954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.16550933321317038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.15618667006492615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.1686613361040751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.16752533117930093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.1597653329372406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.10966400305430095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.10847999652226765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.10417067011197408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.09110933542251587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.0909493366877238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.09242666761080424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.09274133046468098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.085807998975118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.09299199779828389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.09309333562850952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.08582400282224019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.0942186713218689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.08960533142089844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.06436799963315327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.06302933394908905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.06361599763234456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.059861332178115845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.06041066845258077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.05950400233268738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.059258664647738137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.055770665407180786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.060645331939061485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.06112533311049143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.05625066657861074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,3.83954652150472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,3.7634080251057944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,3.5619147618611655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,3.822922706604004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,3.8090667724609375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,3.8266452153523765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,3.9417174657185874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,3.912384033203125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,3.8992106119791665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,3.980064074198405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,3.963573455810547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,4.356234550476074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,2.2964746157328286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,2.2661919593811035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,2.2893226941426597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,1.8798933029174805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,1.8789866765340169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,1.7938879330952961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,1.9078240394592285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,1.9170826276143391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,1.8741332689921062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,1.9187520345052083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,1.9308746655782063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,1.9224640528361003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,1.9805493354797363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,1.964570681254069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,2.1702933311462402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,1.1345760027567546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,1.132485310236613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,1.136672019958496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,0.9558453559875488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,0.955888032913208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,0.9076853593190511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,0.9637760321299235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,0.9667253494262695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,0.9275360107421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,0.9683413505554199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,0.971226692199707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,0.9401760101318359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,0.9851946830749512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,0.9828213055928549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,1.0769279797871907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,0.584933320681254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,0.5705653429031372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,0.5752799908320109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,0.49012800057729083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,0.48931201299031574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,0.4649813175201416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,0.49533331394195557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,0.4952426751454671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,0.4750453233718872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,0.4968159993489583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,0.49827734629313153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,0.4755733410517375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,0.5062666734059652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,0.504741350809733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,0.5247199932734171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,0.3057386676470439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,0.29685866832733154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,0.2992159922917684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.25752000013987225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.2572373350461324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.24199465910593668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.2592159907023112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.2585386633872986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.24687999486923218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,0.2609493335088094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.2608746687571208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.25126399596532184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,0.2656373381614685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,0.2652906576792399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.2566293279329936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,0.166512002547582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.16328533490498862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.1622773309548696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,3.768650690714518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.14062933127085367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.13130133350690207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.14017599821090698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.13991467157999674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.1325759987036387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.14220800002415976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.14194666345914206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.13410666584968567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.14411200086275736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.14505599935849509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.13874666889508566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.09497599800427754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.09272000193595886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.07754133145014445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.07910400132338206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.07239999870459239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.07785599927107494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.07736533383528392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.07228800157705943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.0782293329636256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.07979733248551686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.0795413355032603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.08050666749477386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.07587733368078868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.055498664577802025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.05701333284378052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.050527999798456825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.0468800018231074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.05020266771316528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.05157866577307383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.04841599861780802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.050250664353370667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.05246399839719137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,1.7192479769388835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,1.713040033976237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,1.6343520482381184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,1.7317493756612141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,1.7294613520304363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,1.7205333709716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,1.7402292887369792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,1.7582987149556477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,1.778175989786784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,1.814784049987793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,1.7970186869303386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,2.017749309539795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,1.0438400109608967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,1.0287466843922932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,1.0589813391367595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,0.870416005452474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,0.8688053290049235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,0.8288053671518961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,0.871018648147583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,0.870901346206665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,0.8465119997660319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,0.8845600287119547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,0.8841333389282227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,0.8493599891662598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,0.9004746278127035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,0.8982453346252441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,1.00326935450236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,0.5357280174891154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,0.5283679962158203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,0.5363733371098837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,0.4479413429896037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,0.44656534989674884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,0.42379732926686603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,0.4482773145039876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,0.4477279980977376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,0.4334239959716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,0.45208533604939777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,0.4562986691792806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,0.43783998489379883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,0.46082135041554767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,0.46220266819000244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,0.49267200628916424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,0.28205867608388263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,0.2748533288637797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,0.27847999334335327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.2362826665242513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.23559999465942383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.22021865844726562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.23625065883000693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.23616000016530356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.22658133506774902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.23786665995915732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.23796266317367554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.22777599096298218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.24303466081619263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.24259199698766074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.23709332942962646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,0.15337066849072775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.15028267105420431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.15241066614786783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.12663466731707254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.1270240048567454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.11809600392977397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.1276693344116211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.12681066989898682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.11941333611806233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.12795199950536093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.12772267063458762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.12153066198031108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.1327839990456899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.13248533010482788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.12595733006795248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.0878613293170929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.08573866883913676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.0848586658636729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.07127999762694041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.07067733506361644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.06757333377997081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.0728000005086263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.07314133147398631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.07259733478228252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.07312533259391785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.07076266904671986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.048581331968307495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.049413333336512245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.05143466591835022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.04601066807905833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.04257066547870636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.04459733267625173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.042810668547948204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.0439626673857371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.04430399835109711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,0.9550560315450033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,0.9531786441802979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,0.9026400248209635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,0.9663786888122559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,0.9609119892120361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,0.9214826424916586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,0.9739306767781576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,0.9712053140004476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,0.941157341003418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,0.9939893086751302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,0.9879199663798014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,1.0605706373850505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,0.5729333162307739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,0.5628373225529989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,0.5704160133997599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,0.4865866502126058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,0.4841119845708211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,0.4598986705144246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,0.48919467131296795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,0.4896693229675293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,0.47259732087453205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,0.49401601155598956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,0.49473599592844647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,0.4729386568069458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,0.5058133204778036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,0.5031466484069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,0.5193973382314047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,0.2942720055580139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,0.2898613413174947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,0.29517332712809247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.2536320090293884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.25309866666793823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.23919999599456787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.25339200099309284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.2543413241704305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.24450133244196573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.25658132632573444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.258026659488678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.2468106746673584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.2632799943288167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.2630239923795064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.25014932950337726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,0.15922133127848306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.15663466850916544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.15908799568812051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.13570132851600647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.13460800051689148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.12593600153923035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.13568533460299173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.1368053356806437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.1293706695238749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.13929599523544312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.13823999961217245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.12983466188112894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.14098667105038962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.13946666320165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.13321066896120706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.08849066495895386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.08719467123349507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.08734933535257976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.07369066774845123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.06862399975458781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.07645333309968312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.07584000130494435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.0746506651242574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.07441600163777669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.06883733471234639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.07601066430409749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.07430399954319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.052426666021347046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.05188799897829691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.053530668218930565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.047824000318845115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.04701333244641622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.04629333317279816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.03202133377393087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.03057066599527995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,0.6292053461074829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,0.627888003985087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,0.5961173375447592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,0.6347093184789022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,0.6325013240178426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,0.6007253328959147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,0.6356000105539957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,0.6346133152643839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,0.6066879828770956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,0.646511991818746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,0.6424533526102701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,0.6482933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,0.36633066336313885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,0.3611253499984741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,0.3617493311564128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.3239519993464152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.32331732908884686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.3065173427263896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.3266400098800659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.32656000057856244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.31063467264175415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.3248639901479085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.32579733928044635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.31174399455388385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.3338293234507243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.33061333497365314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.3163573344548543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,0.1932106614112854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.18953067064285278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.1912426749865214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.17149867614110312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.17138665914535522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.16239999731381735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.17084799210230509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.1702079971631368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.16124799847602844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.1728000044822693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.1725226640701294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.16362133622169495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.17522666851679483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.17465599377950033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.16810667514801025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.10532800356547038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.10552533467610677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.10538666447003682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.09287466605504353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.09261866410573323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.08473066488901775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.09299733241399129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.09310400485992432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.08613333106040955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.09502933422724406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.09548800190289815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.08582933743794759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.09654933214187622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.0969546635945638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.0900266667207082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.060218666990598045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.05554133156935374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.05578133463859558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.05482666691144308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.05455466608206431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.05031466484069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.050714666644732155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.03669866671164831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.03631466627120972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.03738666574160258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.03499199946721395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.033914667864640556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.024485332270463307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.47604799270629883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.473530650138855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.4476906855901082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.4755200147628784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.4745279947916667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.4510186513264974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.4759519894917806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.4756106535593669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.4529813528060913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.4796160062154134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.481663982073466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.46002666155497235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,0.26791467269261676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.26477867364883423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.2604479988416036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.24614399671554565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.24648000796635947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.23203200101852417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.24761066834131876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.24621866146723428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.23256532351175943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.24859732389450073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.24702399969100952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.23462400833765665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.25065066417058307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.2490239938100179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.23824000358581543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.14109333356221518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.1402720014254252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.14037866393725076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.1320266624291738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.13301866253217062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.1225440005461375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.13186132907867432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.13086400429407755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.12273066242535909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.13355732957522073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.13184533516565958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.12149866422017415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.13241599996884665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.1323306659857432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.1267039974530538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.08077866832415263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.07424533367156982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.06845866640408833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.06946666538715363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.07438933352629344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.07393066585063934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.06869333485762279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.07477866609891255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.07572799921035767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.0702400008837382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.04128533353408178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.03109866629044215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.021909333765506744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.021946666141351063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.02181866765022278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.022458667556444805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.3910026550292969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.3875146706899007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.37452268600463867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,5.685183842976888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.39185599486033124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.39313066005706787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.3754613399505615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.3921813170115153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.38939201831817627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.3798453410466512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.39209067821502686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.39390401045481366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.38076265652974445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.2139306664466858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.21379733085632324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.21191465854644775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.20344533522923788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.20567466815312704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.19176000356674194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.20397865772247314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.20272533098856607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.1913493275642395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.20335467656453451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.2052639921506246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.19363200664520264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.2062293291091919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.20595733324686685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.11369599898656209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.19662400086720785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.11238933602968852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.11574932932853699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.11157866319020589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.10520000259081523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.11180266737937927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.11122133334477742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.10477333267529805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.11122666796048482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.11062399546305339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.10364266236623128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.11158399780591328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.11037333806355794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.1051626702149709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.06410666803518932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.06043733159701029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.06414933502674103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.06378666559855144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.06413333117961884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.060032000144322716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.06243200103441874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.03692800054947535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.02992533395687739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.02811199923356374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.020213333268960316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,21.083243052164715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,22.30076853434245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,16.522725423177082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,23.68077341715495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,22.143040974934895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,16.490933736165363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,21.803199768066406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,22.243764241536457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,12.245557149251303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,16.40451176961263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,11.456532796223959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,8.541546503702799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,11.0458984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,10.893141428629557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,8.228607813517252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,11.026725769042969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,11.1082394917806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,8.326000213623047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,11.582842508951822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,11.593029022216797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,8.445130666097006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,5.460298538208008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,5.652336120605469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,4.461866696675618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,5.397226969401042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,5.348789215087891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,4.286746660868327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,5.190869331359863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,5.237706820170085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,4.292474746704102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,5.155418713887532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,5.150783856709798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,4.297626813252767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,2.8704799016316733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,2.7061440149943032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,2.8366772333780923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,2.7354294459025064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,2.615381399790446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,2.458581288655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,2.58733860651652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,2.6154774030049643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,2.3274025917053223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,2.6344213485717773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,2.6002186139424643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,2.334751923878988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,12.319722493489584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,12.145472208658854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,9.642831802368164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,12.6439577738444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,12.770533243815104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,10.010298411051432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,13.06320063273112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,12.883722941080729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,6.299925486246745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,9.720410664876303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,6.496970494588216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,5.168586730957031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,6.300448099772136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,5.6024322509765625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,4.924058596293132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,6.409370422363281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,6.468959808349609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,4.937477429707845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,6.274085362752278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,6.233023961385091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,3.0146398544311523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,5.00764274597168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,3.211109479268392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,2.718448003133138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,3.2856852213541665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,2.9988800684611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,2.8165760040283203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,3.076933224995931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,3.029535929361979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,2.608725388844808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,3.158741315205892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,3.009690602620443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,1.8061493237813313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,2.622074604034424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,1.787941296895345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,1.5020052591959636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,1.6059306462605794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,1.6282399495442708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,1.6662079493204753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,1.6172213554382324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,1.4524000485738118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,1.7996266682942708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,1.6171733538309734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,1.6136107444763184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,1.4907414118448894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,8.750544230143229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,8.184202829996744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,6.999914805094401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,9.04372787475586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,8.738810857137045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,7.07151985168457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,9.29533322652181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,9.312847773234049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,4.600879987080892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,7.109312057495117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,3.801786740620931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,4.805962562561035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,4.612730662027995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,4.718090693155925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,3.597248077392578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,4.375909487406413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,4.1265974044799805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,3.6045494079589844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,4.633152008056641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,4.4811147054036455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,2.375157356262207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,3.626986821492513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,2.017189343770345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,2.4777013460795083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,2.154165267944336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,1.9212533632914226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,2.1449813842773438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,2.203711986541748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,2.080512046813965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,2.153338591257731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,2.144517262776693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,1.9287412961324055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,1.2214826742808025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,1.234239975611369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,1.1255573431650798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,1.1884693304697673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,1.187989314397176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,1.07969069480896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,1.193066676457723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,1.192186673482259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,1.1157120068868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,1.191381295522054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,1.2004586855570476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,1.085647980372111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,11.947232564290365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,11.479440053304037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,9.430629094441732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,12.777013142903646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,12.349899291992188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,9.434229532877604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,11.912485758463541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,12.457525889078775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,9.488410949707031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,5.998890558878581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,5.955402374267578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,5.086570739746094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,5.808933258056641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,0.35045866171518963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,6.198853174845378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,4.748522758483887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,5.575994491577148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,6.122912089029948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,4.784895896911621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,6.010901133219401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,5.960805257161458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,3.187274614969889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,4.801429430643718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,3.0282347997029624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,2.6330506006876626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,2.951557477315267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,2.8434292475382485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,2.8369973500569663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,3.001983960469564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,2.862288157145182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,2.7000694274902344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,2.981893221537272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,2.8645652135213218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,1.6659040451049805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,2.49726931254069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,1.606485366821289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,1.4090293248494465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,1.499183972676595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,1.4809333483378093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,1.459328015645345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,1.4891893068949382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,1.523311932881673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,1.3356587092081706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,1.5267306963602703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,1.4957119623819988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,0.8677972952524821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,1.342149257659912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,0.9037706851959229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,0.8023146788279215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,0.8411626815795898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,0.8458666801452637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,0.7652693589528402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,0.8504479726155599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,0.8427306811014811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,0.7677280108133951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,0.8460480372111002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,0.8501706918080648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,0.7695093154907227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,7.293498357137044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,6.815930684407552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,5.82911491394043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,6.856896082560222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,6.996896107991536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,5.811269124348958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,7.077978769938151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,7.09437878926595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,3.524698575337728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,5.853701273600261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,3.7352320353190103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,3.199413299560547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,3.477535883585612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,3.356656074523926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,2.9916054407755532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,3.56388791402181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,3.4139254887898765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,2.9797439575195312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,3.377392133076986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,3.470970789591471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,1.846842606862386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,3.2507359186808267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,1.8772266705830891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,1.6699199676513672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,1.796218713124593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,1.725279966990153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,1.5866026878356934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,1.7702560424804688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,1.7440373102823894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,1.554965337117513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,1.752570629119873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,1.7576586405436199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,0.9964906374613444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,1.5637013117472331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,1.0089279810587566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,0.9103999932607015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,0.9512106577555338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,0.941327969233195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,0.8500906626383463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,0.9431093533833822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,0.9517813523610433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,0.8536907037099203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,0.9608746369679769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,0.9498026371002197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,0.8623306751251221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,0.5678346554438273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,0.5792640050252279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,0.5289653142293295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,0.5521973371505737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,0.5459200143814087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,0.502613345781962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,0.5474986632664999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,0.5488373438517252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,0.5032480160395304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,0.5550560156504313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,0.5047306617101034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,7.0529225667317705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,6.8503468831380205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,6.040202458699544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.1393226683139801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,7.180213292439778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,6.880058924357097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,6.028287887573242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,7.156821568806966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,7.08076286315918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,3.7074718475341797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,6.091541290283203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,3.863679885864258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,3.3495572408040366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,3.4505065282185874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,3.3714240392049155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,3.007648150126139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,3.3733228047688804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,3.49727471669515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,3.063802719116211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,3.4381281534830728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,3.4153226216634116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,3.052432060241699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,1.9378933906555176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,1.9285386403401692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,1.7287039756774902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,1.7728692690531414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,1.738650639851888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,1.555557409922282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,1.7422614097595215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,1.7590773900349934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,1.5710026423136394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,1.7630133628845215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,1.5790506998697917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,0.9964799880981445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,1.0128586292266846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,0.914405345916748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,0.9243520100911459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,0.9280746777852377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,0.8307466506958008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,0.9285066922505697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,0.9317653179168701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,0.8349386850992838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,0.933199961980184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,0.9374293486277262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,0.8414026896158854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,0.5539520184199015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,0.5621920029322306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,0.5092906554539999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,0.5164053440093994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,0.519045352935791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,0.4697920083999634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,0.520138661066691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,0.5225600004196167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,0.4721333185831706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,0.523365338643392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,0.5255893468856812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,0.4758239984512329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,0.32996267080307007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,0.3344053427378337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,0.3080906669298808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,0.3072693347930908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,0.308133323987325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.28753600517908734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,0.31036800146102905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,0.3108479976654053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.28892799218495685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,0.31276800235112506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,0.3137706716855367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.2911786635716756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,4.272261301676433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,4.37061341603597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,3.850010553995768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,4.401791890462239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,4.362261454264323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,3.8685919443766275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,4.458853403727214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,4.357621192932129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,3.9824746449788413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,2.394538720448812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,2.4273813565572104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,2.1858399709065757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,2.168997287750244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,2.1773386001586914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,1.9828799565633137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,2.1825760205586753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,2.1923039754231772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,2.0015573501586914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,2.2000746726989746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,2.2362987200419107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,1.977786699930827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,1.2386879920959473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,1.2626826763153076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,1.1393600304921467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,1.1295733451843262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,1.1352799733479817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,1.0155519644419353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,1.1384960015614827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,1.1431199709574382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,1.0217706362406414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,1.14464537302653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,1.1513226826985676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,1.0303306579589844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,0.6635146538416544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,0.6758453051249186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,0.6104373137156168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,0.6093173424402872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,0.6126986742019653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,0.5496906836827596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,0.6131733258565267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,0.6153226693471273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,0.5520319938659668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,0.6174879868825277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,0.6207146644592285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,0.5573866764704386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,0.3757813374201457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,0.38230931758880615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,0.3476159969965617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,0.3461600144704183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,0.34654398759206134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.31754134098688763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,0.34938665231068927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,0.3492533365885417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,0.31852267185846966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,0.35155200958251953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,0.35304534435272217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,0.3210986653963725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,0.23115734259287515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,0.23470399777094522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.21760000785191855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.2119040091832479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.21125332514444986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.19732799132665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.21198934316635132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.21266132593154907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.19954133033752441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.21475734313329062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.21546665827433267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.2015519936879476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,4.813136100769043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,4.820351918538411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,4.277429262797038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,4.778175989786784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,4.815418561299642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,4.29531192779541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,4.827669461568196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,4.8838240305582685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,4.357898712158203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,2.6650293668111167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,2.72981866200765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,2.4422879219055176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,2.374000072479248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,2.3846559524536133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,2.129818598429362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,2.387274742126465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,2.3982507387797036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,2.1433067321777344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,2.414384047190348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,2.4305386543273926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,2.173093318939209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,1.3644213676452637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,1.3870399792989094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,1.2515733242034912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,1.2217013041178386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,1.2260533173878987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,1.0941973527272542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,1.2288320064544678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,1.2344373067220051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,1.1022613048553467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,1.239967981974284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,1.2489066918690999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,1.114250659942627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,0.7138346831003824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,0.7304533322652181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,0.6598720153172811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,0.6446559826532999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,0.6480266650517782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,0.5794080098470052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,0.6485333442687988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,0.6509973208109537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,0.5825119813283285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,0.6548106670379639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,0.659173329671224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,0.5906240145365397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,0.3911999861399333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,0.39771199226379395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,0.36103999614715576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,0.3545440038045247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,0.3548853397369385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,0.3214186628659566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,0.35573867956797284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,0.3569386800130208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,0.3214346567789714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,0.3587199846903483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,0.36137068271636963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,0.32631466786066693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,0.22753600279490152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,0.23248533407847086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.21227200826009116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.20256533225377402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.20282133420308432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.18915732701619467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.20505066712697348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.204367995262146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.1914400060971578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.20630399386088052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.20796799659729004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.19463467597961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.14383467038472494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.14590932925542197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.13756799697875977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.13174933195114136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.13238933682441711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.1251253286997477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.1320266624291738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.1338986655076345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.12460266550381978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.13192533453305563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.13354133566220602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.12599466244379678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,3.2272265752156577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,3.241312026977539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,2.884458541870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,3.243311882019043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,3.258970578511556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,2.900069236755371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,3.283679962158203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,3.2962827682495117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,2.9350932439168296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,1.829487959543864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,1.855456034342448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,1.6764480272928874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,1.614458719889323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,1.6225013732910156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,1.443834622701009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,1.6260426839192708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,1.6344587008158367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,1.4549333254496257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,1.6406505902608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,1.653322696685791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,1.4730079968770344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,0.9411786397298177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,0.9579946994781494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,0.864687999089559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,0.83405868212382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,0.8386293252309164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,0.7469813028971354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,0.8421493371327718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,0.8460746606191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,0.7529386679331461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,0.8483466307322184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,0.8554399808247884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,0.7617973486582438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,0.49726398785909015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,0.5096799929936727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,0.46041067441304523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,0.44644800821940106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,0.4484906593958537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,0.4005333185195923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,0.4478079875310262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,0.44994668165842694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,0.4029386838277181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,0.4538240035374959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,0.4545493523279826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,0.4070773522059123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,0.27507199843724567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,0.28143467505772907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,0.25497599442799884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.24413333336512247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.24605333805084229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.22446399927139282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.2479040026664734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.24899200598398843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.2262986699740092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,0.2517919937769572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,0.253711998462677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.22931732734044394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.162800004084905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.16551466782887778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.15275200208028158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.14294933279355368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.14458133776982626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.1323306659857432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.1479520003000895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.14867732922236124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.13210666179656982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.14560533563296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.14638933539390564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.13590400417645773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.10737066467603047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.1090880036354065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.10060266653696696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.09730666875839233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.09237866600354512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.09773332873980205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.09810133775075276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.09292800227801006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.09719467163085938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.0990559955437978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.0932373305161794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,3.5895519256591797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,3.62392520904541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,3.5069119135538735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,3.610698699951172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,3.6314878463745117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,3.5791571935017905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,3.683034578959147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,3.6979945500691733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,3.594623883565267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,2.0525973637898765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,2.052303949991862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,1.9928800264994304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,1.7469120025634766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,1.7706613540649414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,1.7100159327189128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,1.752837340037028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,1.7854666709899902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,1.7636799812316895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,1.7926826477050781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,1.7943572998046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,1.7482239405314128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,1.0228266716003418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,1.0107733408610027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,0.9911200205485026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,0.8866293430328369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,0.8903893629709879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,0.8232426643371582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,0.8899146715799967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,0.8914399941762289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,0.832640012105306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,0.9083733558654785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,0.907690684000651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,0.8626453081766764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,0.5285066763559977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,0.5158720016479492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,0.5097493330637614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,0.4580639998118083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,0.4599626859029134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,0.4228586753209432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,0.45732800165812176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,0.4583679835001628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,0.4256693522135417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,0.46953598658243817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,0.4689226547876994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,0.4442933400472005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,0.2813599904378255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,0.2758293350537618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,0.2699413299560547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,0.24446400006612143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.2432373364766439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.22601600488026938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,0.2430880069732666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,0.24428266286849976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.22546666860580444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,0.25170665979385376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,0.2502133250236511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.23508799076080322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.15681599577267966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.15241066614786783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.14841600259145102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.1325866679350535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.13395200173060098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.12241599957148235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.13286399841308594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.13338667154312134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.1225279966990153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.1378666659196218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.13755200306574503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.13150933384895325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.09291733304659526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.08965333302815755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.08915733297665913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.07959466675917308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.0790880024433136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.07573333382606506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.08045333127180736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.07939733564853668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.07262399792671204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.08096533517042796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.08113066852092743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.07398933172225952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.05602666735649109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.056090667843818665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.049546668926874794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.05426666637261709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.05416533350944519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.05420800050099691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,3.1199467976888022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,3.149104118347168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,3.0536108016967773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,3.118805249532064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,3.1429920196533203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,3.15885321299235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,3.201317469278971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,3.2298558553059897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,3.1227146784464517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,1.8145546913146973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,1.7849547068277996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,1.7723946571350098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,1.5105066299438477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,1.5320852597554524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,1.4904425938924153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,1.5133387247721355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,1.5419573783874512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,1.5515680313110352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,1.5518827438354492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,1.5774985949198406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,1.5511840184529622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,0.8973173300425211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,0.8817599614461263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,0.8777546882629395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,0.7658240000406901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,0.7682666778564453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,0.7126133441925049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,0.7664639949798584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,0.7693119843800863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,0.7162240346272787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,0.7853919665018717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,0.7841760317484537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,0.7474506696065267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,0.462661345799764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,0.4516106843948364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,0.45107734203338623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,0.3943359851837158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,0.3945653438568115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,0.36486931641896564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,0.3940639893213908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,0.3951359987258911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,0.36929066975911456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,0.40556267897288006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,0.4039466778437297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,0.38418134053548175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,0.2453599969546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,0.23893866936365762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,0.2383093237876892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.2104746699333191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.20962133010228476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.19458667437235513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.21092800299326578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.20973867177963257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.19541333119074503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.21607999006907144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.21571733554204306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.20516266425450644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.13481066624323526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.13124799728393555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.13010133306185404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.11297067006429036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.11398933331171672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.10595200459162395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.11352533102035522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.11406933267911275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.10785067081451416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.11753599842389424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.1178986628850301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.11342933773994446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.07850133379300435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.07919466495513916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.06743466854095459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.06941333413124084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.062405332922935486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.06829866766929626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.06046399970849355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.0697920024394989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.06319466729958852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.04807466765244802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.04061333338419596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.03230933348337809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.03207999964555105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.03193599979082743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,1.367568016052246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,1.393631935119629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,1.3727359771728516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,1.3847360610961914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,1.395407994588216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,1.4273279507954915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,1.4261919657389324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,1.4452853202819824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,1.4116106033325195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,0.8199520111083984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,0.8019893169403076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,0.8201546669006348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,0.6959146658579508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,0.6959520181020101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,0.6477546691894531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,0.6987040042877197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,0.6978826522827148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,0.6501439809799194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,0.7163306872049967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,0.7129440307617188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,0.6892586549123129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,0.42551998297373456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,0.4153706630071004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,0.4207093318303426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,0.3585653305053711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,0.3586239814758301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,0.3329919974009196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,0.36103467146555585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,0.3598346710205078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,0.33604268232981366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,0.37029866377512616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,0.36845866839090985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,0.35048532485961914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,0.22709866364796957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,0.22263999780019125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,0.22249066829681396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.1921280026435852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.1927573283513387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.17757866779963175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.19157866636912027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.19338667392730713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.17961599429448447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.1979680061340332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.19800533850987753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.18677866458892822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.12595199545224509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.12362133463223775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.12356799840927124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.10616532961527507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.10708799958229065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.09730666875839233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.10954667131106059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.10969066619873047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.09822932879130046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.11016000310579936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.10966933767000835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.10419733325640361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.07117333511511485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.06914133330186208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.07339199880758922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.06157866617043813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.06147199869155884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.05629866818586985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.06189866860707601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.060421332716941833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.05470933516820272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.06154133379459381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.06181333462397257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.057616000374158226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.04419733087221781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.04166933397452036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.04197333256403605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.03863999992609024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.04223466912905375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.03806400050719579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.030495998760064442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.022704000274340313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.024288001159826916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,0.7496159871419271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,0.7512426376342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,0.716218630472819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,0.7535200119018555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,0.7596586545308431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,0.7618772983551025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,0.7715093294779459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,0.7696373462677002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,0.7461866537729899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,0.44873066743214923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,0.44073065121968585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,0.4479733308156331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,0.3829866647720337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,2.155615965525309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,0.38359999656677246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.36452265580495197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,0.3858720064163208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,0.3853066762288411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.36585064729054767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,0.3960640033086141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,0.3921813170115153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,0.3789759874343872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,0.23696533838907877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,0.23188267151514688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,0.2348853349685669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.20422399044036865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.19338132937749228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.2057653268178304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.20563733577728271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.1928106745084127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.20844266812006632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.20844799280166626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.20084800322850546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.12963733077049255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.1264479955037435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.12956266601880392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.10965333382288615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.11156800389289856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.10340266426404317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.11157332857449849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.11161067088445027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.1051093339920044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.11502400040626526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.11422933141390483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.11136533816655476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.07443733513355255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.0727040022611618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.07773866752783458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.06539733211199443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.0642986645301183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.05807466804981232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.06407999992370605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.06445866823196411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.059792002042134605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.06660800178845723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.06680533289909363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.061520000298817955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.04228800038496653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.03211733450492223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.029978667696317036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.021877333521842957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,0.49118932088216144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,0.49057066440582275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.46140801906585693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,0.4931146701176961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,0.49515732129414874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.46806931495666504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,0.5019733508427938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,0.5012266635894775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,0.4763520161310832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,0.28763200839360553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,0.2831146717071533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,0.28255999088287354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.2563520073890686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.2570506731669108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.24092799425125122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.2568053404490153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.25673067569732666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.2411253253618876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.2607146700223287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.2592800060908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.2502986590067546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.15481600165367126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.15102400382359824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.15279466907183328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.13727999726931253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.1378933290640513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.12829333543777466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.13849600156148276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.13802133003870645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.12967466314633688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.1402346690495809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.13869866728782654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.13408533732096353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.08775466680526733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.08689066767692566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.08870933453241985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.07746133208274841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.07681599756081899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.07070399820804596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.0786240001519521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.07682133217652638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.07824533184369405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.07854400078455608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.05106666684150696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.05019199848175049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.043712000052134194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.04878933231035868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.0481279989083608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.04532266656557719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.03448000053564707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.03297066688537598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.032261334359645844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.030202666918436687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.03252800057331721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.0322026660044988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.025781333446502686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.3654346863428752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.36457598209381104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.3463626702626546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.3641599814097087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.36498133341471356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.34557334582010907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.3699359893798828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.3686026732126872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.3531573216120402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.20779200394948324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.20615466435750326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.20340800285339355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.19170665740966797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.19321600596110025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.1795253356297811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.1921173334121704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.19138665994008383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.18145600954691568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.19324266910552979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.1935466726620992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.1860533356666565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.1134986678759257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.1130560040473938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.11333866914113362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.10342400272687276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.10435199737548828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.09519466757774353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.10517332951227824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.10545600454012553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.09530133008956909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.1053706705570221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.09759466846783955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.06052266558011373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.060517330964406334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.06238399942715963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.06157866617043813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.04219200213750204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.03915733347336451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.04066666712363561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.04101866732041041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.027877333263556164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.025648000339667004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.028218666712443035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.02589333305756251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.29869333902994794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.29799999793370563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.2857439915339152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.299615999062856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.29782400528589886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.28565865755081177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.30100266138712567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.30108799537022907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.29284799098968506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.1669493317604065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.16453333695729574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.16617066661516824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.1570133368174235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.15665066242218018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.14864533146222433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.1578933298587799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.15848533312479654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.14852799971898398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.1585599978764852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.1586186687151591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.15040533741315207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.090938667456309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.08945600191752116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.08682133754094441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.08700266480445862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.08702400326728821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.08285866677761078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.08877333005269368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.08868799606959026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.08267199993133545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.08764800429344177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.08295466502507527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.05426133175690969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.054714664816856384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.05297600229581197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.05231466889381409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.05251200000445048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.050442665815353394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.05427733560403188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.050255998969078064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.05300266544024149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.05342933535575867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.035749333600203194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.034186666210492454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.03414933383464813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.033861334125200905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,17.866143544514973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,17.925477345784504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,13.578960418701172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,19.146116892496746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,19.6145757039388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,13.63742446899414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,18.855103810628254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,19.394507090250652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,0.5565013488133749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,13.726842244466146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,8.636677424112955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,9.24351437886556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,7.10853894551595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,9.246533075968424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,8.723391850789389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,7.149562835693359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,8.936426798502604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,9.44540278116862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,6.902682622273763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,9.24826685587565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,8.92910385131836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,6.972714742024739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,4.310842514038086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,4.339328130086263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,3.9646294911702475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,4.114432017008464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,4.038463910420735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,3.616586685180664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,4.245354652404785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,1.7765919367472331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,4.3061173756917315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,3.6039253870646157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,4.323727925618489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,3.6010611852010093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,4.360058784484863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,2.2904532750447593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,2.253760019938151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,2.0138452847798667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,2.232837359110514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,2.1912906964619956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,2.221834659576416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,2.254192034403483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,2.0025280316670737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,2.308565298716227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,2.2529279390970864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,2.1936532656351724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,9.77729606628418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,10.6125119527181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,8.062474568684896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,10.659919738769531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,10.176544189453125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,8.061994552612305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,10.953978220621744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,10.531951904296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,5.41653315226237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,8.074746449788412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,5.433893203735352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,4.318671862284343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,5.275781313578288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,4.889055887858073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,4.191962560017903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,5.517824172973633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,4.126650810241699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,5.209184010823567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,5.296335856119792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,4.144069353739421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,2.6266345977783203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,2.5514559745788574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,2.485760052998861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,2.468714714050293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,2.440528074900309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,2.2811573346455893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,2.4627787272135415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,2.456650733947754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,2.493290742238363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,2.6963678995768228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,2.4669334093729653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,2.4227147102355957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,1.3863840103149414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,1.3951999346415203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,1.3272159894307454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,1.3489333788553874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,1.357925256093343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,1.2303679784138997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,1.3716905911763508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,1.3522399266560872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,1.2199573516845703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,1.3618772824605305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,1.3892319997151692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,1.2508906523386638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,6.9527943929036455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,7.481744130452474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,5.834357579549153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,7.378362655639648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,7.151498794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,5.834005355834961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,7.378106435139974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,7.389455795288086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,5.905242919921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,3.6052160263061523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,3.8187999725341797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,3.336101214090983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,3.407007853190104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,3.596405347188314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,2.996917406717936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,3.459744135538737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,3.468485196431478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,3.379455884297689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,3.545834541320801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,3.68233585357666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,3.05293337504069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,1.8685706456502278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,1.9677119255065918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,1.700090726216634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,1.870682716369629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,1.8106080691019695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,1.6061280568440754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,1.8070400555928547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,1.8090933163960774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,1.7100586891174316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,1.8137599627176921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,1.8119254112243652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,1.7187412579854329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,1.047279993693034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,1.0465333461761475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,0.9507733186086019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,1.0072853565216064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,1.0107946395874023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,0.9122186501820883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,1.0124586423238118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,1.0194666385650635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,0.9122186501820883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,1.0121173063913982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,1.016096035639445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,0.918341318766276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,9.189135869344076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,9.524325052897135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,7.8514556884765625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,10.775680541992188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,10.226767857869467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,7.860879898071289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,10.422058741251627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,10.445440292358398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,7.8999678293863935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,5.54972775777181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,5.256527900695801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,4.236949284871419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,4.497983932495117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,4.6343733469645185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,3.965167999267578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,4.907786687215169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,4.7439572016398115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,3.9770399729410806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,4.872458775838216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,4.890165328979492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,4.013210614522298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,2.433605353037516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,2.5515146255493164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,2.254533290863037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,2.426618734995524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,2.4255626996358237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,2.2648746172587075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,2.3316853841145835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,2.322890599568685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,2.217594623565674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,2.341503938039144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,2.3437280654907227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,2.084378719329834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,1.302730639775594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,1.3178719679514568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,1.1862506866455078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,1.2502773602803547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,1.2547626495361328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,1.119157314300537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,1.2519520123799641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,1.2559786637624104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,1.1203467051188152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,1.26636799176534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,1.2641706466674805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,1.1298240025838215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,0.7360373338063558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,0.7693546613057455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,0.6780107021331787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,0.7139093081156412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,0.7168640295664469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,0.6485546827316284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,0.7168906529744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,0.7186293601989746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,0.650325338045756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,0.7189919948577881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,0.7227359612782797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,0.6541386842727661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,5.923562367757161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,5.929162979125977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,4.817925453186035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,5.684047698974609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,5.724271774291992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,4.863909403483073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,6.236677169799805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,5.683274586995442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,4.878602663675944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,2.9827893575032554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,3.051157315572103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,2.66864013671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,2.7890453338623047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,2.781050682067871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,2.69706662495931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,2.9813547134399414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,2.834405263264974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,2.4800267219543457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,2.889098803202311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,2.9543145497639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,2.48907740910848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,1.538383960723877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,1.5577866236368816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,1.46234130859375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,1.4540640513102214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,1.5134453773498535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,1.3662346204121907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,1.4641067186991374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,1.4657546679178874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,1.3293386300404866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,1.4663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,1.47107728322347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,1.3077759742736816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,0.8360426425933838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,0.848037322362264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,0.765338659286499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,0.7975733280181885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,0.798255999883016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,0.7154026826222738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,0.7996586958567301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,0.8012906710306803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,0.7181333700815836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,0.8038986523946127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,0.8071680068969727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,0.722762664159139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,0.4859893321990967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,0.49350400765736896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,0.450981338818868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,0.46610132853190106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,0.4666080077489217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,0.42501334349314374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,0.46836801369984943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,0.4702506860097249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,0.4267093340555827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,0.4724106788635254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,0.47417600949605304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,0.4289226531982422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,5.8211415608723955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,6.070570627848308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,4.997509320576985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,5.862602869669597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.20364266633987427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,5.808207829793294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,5.004554748535156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,5.98307736714681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,5.866394678751628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,5.062165260314941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,3.090245246887207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,3.1580479939778647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,2.840223948160807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,2.8201545079549155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,2.835621198018392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,2.5170346895853677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,2.883381207784017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,2.873407999674479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,2.8073813120524087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,2.8700478871663413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,2.742944081624349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,2.8838879267374673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,1.584181308746338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,1.6234505971272786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,1.4375146230061848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,1.549056053161621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,1.314639965693156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,1.4654134114583333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,1.4725440343221028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,1.4708107312520344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,1.306058645248413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,1.481119950612386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,1.4945386250813801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,1.3191413084665935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,0.8440213203430176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,0.8549280166625977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,0.7640799681345621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,0.7826933066050211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,0.6974826653798422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,0.7862559954325358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,0.7850560347239176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,0.7905653317769369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,0.6997386614481608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,0.793994665145874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,0.7942773501078287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,0.7074613571166992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,0.46980265776316327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,0.4796533187230428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,0.42982399463653564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,0.4411146640777588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,0.4395466645558675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,0.396666685740153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,0.4421546856562297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,0.4445173343022664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,0.3970239957173665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,0.447711984316508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,0.4483199914296468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,0.40275200208028156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,0.28499199946721393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,0.2898453275362651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.2627786596616109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,0.2632159988085429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,0.26427199443181354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.24179200331370035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,0.2633226712544759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,0.26495466629664105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.24438933531443277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,0.26708799600601196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,0.26851733525594074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.24940266211827597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,3.6127360661824546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,3.615034739176432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,3.199743906656901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,3.6224374771118164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,3.649717330932617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,3.214399973551432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,3.702437400817871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,3.698373476664225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,2.0824267069498696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,3.2572959264119468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,2.1004640261332193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,1.8180425961812336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,1.8645386695861816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,1.8330453236897786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,1.6174826622009277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,1.8512214024861653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,1.8497920036315918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,1.694218635559082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,1.866773287455241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,1.8648266792297363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,1.0441653728485107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,1.670453389485677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,1.063370704650879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,0.9618879954020182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,0.9534986813863119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,0.9616906642913818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,0.8477919896443685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,0.9648799896240234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,0.964576005935669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,0.854032039642334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,0.9702719847361246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,0.9764053026835123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,0.8631412982940674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,0.5615946849187216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,0.5726613203684489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,0.5123253266016642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,0.518725315729777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,0.5211626688639323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,0.4630346695582072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,0.5203146537144979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,0.5268213351567587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,0.4636853138605754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,0.5287040074666342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,0.5299199819564819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,0.3225546677907308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,0.47064534823099774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,0.3275306622187297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,0.2953386704126994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,0.2945280075073242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,0.29548799991607666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.2680160005887349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,0.29758399724960327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,0.29707199335098267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.27115732431411743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,0.30073599020640057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,0.3048373262087504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.2735893328984578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.20124799013137817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.20362667242685953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.1851466695467631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.1845866640408834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.1846239964167277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.1689173380533854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.18557866414388022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.16896533966064453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.18573333819707236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.18707732359568277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.170415997505188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,4.061322530110677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,4.052768071492513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,3.5428905487060547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,4.082922617594401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,4.069664001464844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,3.556698799133301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,4.13867727915446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,4.132927894592285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,3.6166292826334634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,2.254949410756429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,2.283909320831299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,2.0543786684672036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,2.013733386993408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,2.022496064503988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,1.7706507047017415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,2.022576014200846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,2.0329813957214355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,1.7823519706726074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,2.0479466120402017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,2.0608320236206055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,1.8104747136433919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,1.1552533308664958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,1.1737706661224365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,1.0449493726094563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,1.0357120037078857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,1.0386666456858318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,0.9105386734008789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,1.041424036026001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,1.0473919709523518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,0.9181866645812988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,1.054975986480713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,1.0620319843292236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,0.9317333698272705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,0.6085066795349121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,0.621503988901774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,0.5543893178304037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,0.5503146648406982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,0.5523840188980103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,0.48476799329121906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,0.552069346110026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,0.5553599993387858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,0.48863999048868817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,0.5595680077870687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,0.5625706513722738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,0.49460268020629883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,0.33326399326324463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,0.34073599179585773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,0.30616533756256104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,0.3025226593017578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,0.3040906588236491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.2715573310852051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,0.3035093347231547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,0.30567999680836994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.27191466093063354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,0.3080000082651774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,0.3113386631011963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.277402659257253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.19554666678110758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.20027732849121094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.18142932653427124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.17499732971191406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.17522666851679483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.1588159998257955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.17704000075658163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.17509865760803223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.160970667997996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.1786293387413025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.18147200345993042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.16619732975959778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.12471466263135274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.12593600153923035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.11768533786137898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.11622933546702068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.11733333269755046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.10774933298428853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.11553066968917847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.11571733156840007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.10764799515406291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.1162453293800354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.11797333757082622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.10942400495211284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,2.727253278096517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,2.7373600006103516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,2.3829545974731445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,2.7457920710245767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,2.7592267990112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,2.40447998046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,2.7864694595336914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,2.8041280110677085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,2.4489386876424155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,1.548144022623698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,1.5714772542317708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,1.3970239957173665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,1.374224026997884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,1.3808213869730632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,1.201855977376302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,1.3818292617797852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,1.39191468556722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,1.2135573228200276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,1.403450647989909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,1.413589318593343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,1.233738660812378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,0.7985653082529703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,0.8145226637522379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,0.7229066689809164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,0.7100479602813721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,0.714026689529419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,0.6237546602884928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,0.7191946506500244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,0.7237760225931803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,0.6322400172551473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,0.7262453238169352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,0.7332906723022461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,0.6406506697336832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,0.4244106610616048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,0.4355306625366211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,0.3882879813512166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,0.3816320101420085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,0.3837653398513794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,0.3360746701558431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,0.3834293286005656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,0.38648533821105957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,0.338858683904012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,0.38948798179626465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,0.39213331540425617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,0.34534935156504315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,0.23676266272862753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,0.2432373364766439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.2176213264465332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.21116799116134644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.21147199471791586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.19131199518839517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.21250667174657187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.21347200870513916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.19366933902104697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.21744000911712646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.21980265776316324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.19562133153279623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.1421173314253489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.14503467082977295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.13264000415802002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.12638399998346964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.12653332948684692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.11346667011578877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.12520533800125122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.12662933270136514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.11453333497047424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.12774399916330972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.12846400340398154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.11772800485293071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.09238933523495992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.09330667058626811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.08880000313123067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.08888000249862671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.08691199620564778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.08770666519800822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.08078933258851369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.08893332878748576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.08918933073679607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.08294933537642162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,2.936885197957357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,2.9379307428995767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,2.7077598571777344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,2.9659999211629233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,2.982794761657715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,2.908624013264974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,3.1634346644083657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,3.152565320332845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,2.92193603515625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,1.7131253878275554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,1.6818505922953289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,1.6218239466349285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,1.4815893173217773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,1.483674685160319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,1.3585920333862305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,1.4967466990152996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,1.4945972760518391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,1.3820959726969402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,1.5339520772298176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,1.5143253008524578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,1.4719146092732747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,0.8743306795756022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,0.8553067048390707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,0.8229280312856039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,0.749514659245809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,0.7501386801401774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,0.6910453637441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,0.7590400377909342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,0.7607413132985433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,0.697381337483724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,0.7722826798756918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,0.7697866757710775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,0.7100213368733724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,0.4551253318786621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,0.4455360174179077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,0.4265333414077759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,0.3898293177286784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,0.389354666074117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,0.3498133420944214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,0.39565332730611164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,0.3954613208770752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,0.3593333164850871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,0.4003359874089559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,0.39878400166829425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,0.3663626511891683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,0.24272000789642334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,0.2378186583518982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,0.2274186611175537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.20902933677037558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.2086133360862732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.18712000052134195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.21222933133443198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.2119093338648478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.194757342338562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.2151040037473043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.21478933095932007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.1966666579246521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.13781332969665527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.13557866215705872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.12988799810409546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.11488533020019531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.11437867085138957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.10315733154614766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,1.9589707056681316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.11603732903798421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.1067573328812917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.11979732910792033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.11784533659617107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.12035733461380005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.07612800101439159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.11055999994277954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.0749066670735677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.06790400048096974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.06884799897670746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.07014933228492737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.0705973356962204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.07070399820804596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.07008000214894612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.06247999767462412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.05086933573087057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.050986667474110924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.04907733201980591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.05011733373006185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.051498666405677795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.04606399933497111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,2.529834588368734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,2.533221403757731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,2.3380746841430664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,2.5812320709228516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,2.6152426401774087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,2.551392078399658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,2.754197438557943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,2.7453972498575845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,1.5032000541687012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,2.56712007522583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,1.483674685160319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,1.2792373498280842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,1.277359962463379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,1.1715892950693767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,1.2933119932810466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,1.2921280066172283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,1.2259360154469807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,1.344805399576823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,1.3104746341705322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,1.287445306777954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,0.7670453389485677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,0.7447840372721354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,0.7280426820119222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,0.6477760076522827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,0.646997332572937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,0.5953653256098429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,0.6543733278910319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,0.6556106805801392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,0.6045226653416952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,0.6678400039672852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,0.6658506790796915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,0.6155306498209635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,0.39787201086680096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,0.38710931936899823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,0.37643198172251385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,0.33539732297261554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,0.3462453285853068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.3112000028292338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,0.34142935276031494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,0.3465866645177205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.31539199749628705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,0.3489919900894165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,0.34649598598480225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,0.31642667452494305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,0.21425066391626993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,0.20826133092244467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,0.19995200634002686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.18552533785502115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.18623999754587808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.16755733887354532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.18456000089645386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.18438400824864706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.16831467549006143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.18830400705337524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.18675732612609863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.17026132345199585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.11985599994659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.11738666892051697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.11434132854143779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.1011786659558614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.09945066769917806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.10147733489672343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.10186666250228882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.09242133299509685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.1034986674785614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.10284266869227092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.09519466757774353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.068271999557813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.0666186660528183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.06737599770228068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.061946665247281395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.0611413319905599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.06039466460545858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.061834668119748436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.04404266675313314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.044533332188924156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.042117332418759666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,5.2113494873046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.038218667109807335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.04404266675313314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.038933334251244865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.031770666440327965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.030618667602539062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.030618667602539062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,1.157311995824178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,1.0683093070983887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,1.1709493001302083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,1.1680373350779216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,1.1367733478546143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,1.218784014383952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,1.19650133450826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,1.1905706723531086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,0.7041280269622803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,0.6856160163879395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,0.6760959625244141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,0.6021013259887695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,0.5836960077285767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,0.5419146617253622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,0.5923680067062378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,0.5885866483052572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,0.5484426816304525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,0.6053280035654703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,0.6033600171407064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,0.5614453156789144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,0.36447465419769287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,0.35499731699625653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,0.350383996963501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,0.3018186688423157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,0.30372800429662067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.27404266595840454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,0.3080853422482808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,0.30719467004140216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.2816480000813802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,0.3123733401298523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,0.311296006043752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.2891146739323934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,0.19926400979359946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,0.19418134291966757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.1872160037358602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.16390400131543478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.16496533155441284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.1488106648127238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.16796799500783285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.16715733210245767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.15285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.16920000314712524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.16899200280507407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.15680533647537231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.1120853324731191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.1107306679089864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.10753599802652995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.09159466624259949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.09327999750773112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.0823520024617513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.09513599673906963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.09448533256848653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.09572800000508626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.09593600034713745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.08823466300964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.06464533507823944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.05769066512584686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.05834666887919108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.05045333504676819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.04199466605981191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.026426665484905243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.02430933217207591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,0.6385333140691122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,0.6378399928410848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,0.5945493380228678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,0.6464106639226278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,0.6484106779098511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,0.6011306842168173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,0.6584479808807373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,0.6581600109736124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,0.6150773366292318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,0.3883093198140462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,0.37856535116831463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,0.3739306529362996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,0.32706133524576825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,0.3284533421198527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.29786133766174316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,0.3325759967168172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,0.33371734619140625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.30929599205652875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,0.3374026616414388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,0.3368106683095296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.3150986631711324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,0.20323199033737183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,0.19984533389409384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,0.19737066825230917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.1728000044822693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.17309333880742392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.15783466895421347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.17574399709701538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.17684799432754517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.16450666387875876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.1774079998334249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.1775253415107727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.16774932543436685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.11358400185902913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.11176000038782756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.09503466884295146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.0946560005346934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.08713600039482117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.09745066364606221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.09742400050163269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.08804266651471455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.0976746678352356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.09710400303204854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.09272000193595886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.06437866886456807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.06422399977842967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.06533333162466685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.0561706672112147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.056426664193471275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.05799466868241628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.0566293348868688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.05676266551017761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.026634665826956432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.02197333425283432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.019802667200565338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,0.41763198375701904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,0.4173440138498942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.38075733184814453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,0.4233706792195638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,0.4235413471857707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.391210675239563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,0.4268960158030192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,0.4251999855041504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.3965546687444051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,0.24684266249338785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,0.24276800950368246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,0.23847466707229614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.21832533677419028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.21797333161036173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.19951466719309488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.2208426594734192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.2223200003306071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.20692267020543417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.22270933787027994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.2222399910291036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.20789867639541626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.132560004790624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.13110933701197305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.13108799854914346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.11802132924397786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.11781866351763408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.10605866710344951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.11766933401425679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.11756267150243123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.1081119974454244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.11897599697113037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.11778666575749715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.1114026705423991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.0728000005086263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.07273066540559132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.07474133372306824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.06761066615581512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.06785599887371063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.06632533172766368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.0665280024210612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.06678933401902516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.06859200199445088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.061717331409454346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.043663998444875084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.0400693342089653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.0432640016078949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.030741333961486816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.030640001098314922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.30908799171447754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.308624009291331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.28805333375930786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.3110346595446269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.311845342318217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.29474665721257526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.31224000453948975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.3126879930496216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.2967306574185689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.17742933829625449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.1755253275235494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.1750133236249288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.16275733709335327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.16332266728083292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.14999999602635702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.16299733519554138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.16267733772595724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.1523360013961792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.1645813286304474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.16478400429089865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.15612266461054483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.0946613351504008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.09519466757774353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.09572266538937886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.08912533521652222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.08892800410588582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.08823466300964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.08906666437784831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.08116266628106435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.08915199836095174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.08924266695976257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.08299200236797333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.05638933181762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.0537013312180837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.04885333279768626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.05365333457787832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.05269333223501841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.04948266843954722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.054154664278030396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.051728000243504844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.017711999515692394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.2527466615041097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.2525013287862142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.23780266443888345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.25170665979385376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.25169599056243896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.24229333798090616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.25329599777857464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.2529226740201314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.24641066789627075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.14028799533843994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.13965866963068643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.1342080036799113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.13316266735394797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.12575466434160867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.1332533359527588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.13403200109799704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.12593600153923035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.1338879962762197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.13424000144004822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.12742933630943298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.07672533392906189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.07309333483378093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.07464000085989635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.07215466598669688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.07471466561158498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.07238399982452393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.0763679991165797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.07668266693751018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.0729013333717982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.04620266457398733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.04641066491603851
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.04576000074545542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.04438399771849314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.04719999929269155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.0460746685663859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.03284800052642822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.031013332307338715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,14.370299021402994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,14.086500803629557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,10.985482533772787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,16.083354949951172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,15.3460693359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,10.955652872721354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,14.733311971028646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,15.4978879292806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,11.080586751302084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,14.932848612467447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,15.042853037516275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,11.174116770426432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,7.620239893595378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,7.540645599365234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,5.705178578694661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,6.826938629150391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,6.879509607950847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,5.577861150105794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,7.1558882395426435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,7.617520014444987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,5.539733250935872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,7.458981196085612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,7.4426930745442705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,5.49403190612793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,7.405317306518555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,6.926357269287109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,5.586544036865234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,3.374197324117025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,3.5493173599243164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,3.8515892028808594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,3.2817653020222983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,4.321429252624512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,2.882725397745768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,3.2841974894205728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,3.5404265721639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,3.2421814600626626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,3.345616022745768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,3.4334932963053384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,2.8824853897094727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,3.3341598510742188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,3.6013387044270835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,3.0216426849365234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,1.8165920575459797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,2.0171252886454263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,1.6295839945475261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,1.7848480542500813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,1.7829972902933757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,1.6963094075520833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,1.7782880465189617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,1.7927786509195964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,1.7445440292358398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,1.7944053014119465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,1.8211520512898762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,1.5882399876912434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,1.7923040390014648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,1.9273279507954915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,1.6092212994893391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,7.948597590128581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,7.996128082275391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,6.5272477467854815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,9.370725631713867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,8.628128051757812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,6.4493974049886065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,8.886688232421875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,8.624330520629883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,6.514437357584636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,8.010058720906576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,8.669488271077475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,6.632021586100261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,4.227573394775391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,4.162762641906738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.1857759952545166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,3.465914726257324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,3.7176265716552734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,4.132591883341472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,3.4961652755737305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,3.9509706497192383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,3.91593074798584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,3.3055413564046225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,3.7916905085245767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,3.8998772303263345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,3.3277012507120767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,3.9565760294596353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,3.8843679428100586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,2.015216032663981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,3.34932804107666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,2.1133012771606445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,1.8458986282348633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,2.0297279357910156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,2.023690700531006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,1.7583680152893066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,1.989792029062907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,1.9618879954020183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,1.8610347112019856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,1.9813440640767415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,2.0050293604532876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,1.8364532788594563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,2.0544106165568032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,1.980149269104004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,1.7737600008646648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,1.1150879859924316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,1.1388906637827556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,1.1200799942016602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,1.104144016901652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,1.0907999674479167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,0.9910399913787842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,1.091312011082967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,1.102778673171997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,0.9933760166168213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,1.1031200091044109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,1.0936426321665447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,0.9944213231404623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,1.096000035603841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,1.1165226300557454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,0.9998613198598226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,5.753040313720703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,5.840437571207683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,4.644698778788249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,5.3058827718098955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,5.875600179036458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,4.681568145751953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,5.443599700927734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,5.584506352742513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,4.675408045450847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,6.220591862996419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,5.823781331380208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,4.724506696065267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,2.9067840576171875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,3.226031939188639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,2.547551949818929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,2.845552126566569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,2.7685066858927407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,2.7308425903320312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,2.7047945658365884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,2.9963626861572266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,2.6090614000956216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,2.860080083211263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,2.710463841756185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,2.419520060221354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,2.718400001525879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,2.8401012420654297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,1.6275359789530437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,2.6759414672851562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,1.7577013969421387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,1.3659839630126953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,1.5520532925923665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,1.436079978942871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,1.472981293996175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,1.4411360422770183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,1.4658293724060059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,1.2980053424835205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,1.4973333676656086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,1.4458400408426921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,1.3902400334676106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,1.4510773022969563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,1.4824693997701008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,0.8943999608357748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,0.8793333371480306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,0.774399995803833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,0.8179306983947754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,0.8292906284332275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,0.7778399785359701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,0.8517599900563558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,0.8493013381958008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,0.744213342666626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,0.822773297627767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,0.8296106656392416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,0.7465386390686035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,0.8991733392079672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,0.831717332204183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,0.7498613198598226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,7.329365412394206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,7.53326416015625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,6.251349131266276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,7.582256317138672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,7.915018717447917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,6.320735931396484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,7.79095458984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,7.840229034423828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,6.324133555094401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,7.983754475911458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,8.513455708821615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,6.355215708414714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,3.878218650817871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,4.327253341674805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,3.415045420328776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,3.6067145665486655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,3.618954658508301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,3.550170580546061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,3.6540959676106772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,3.943888028462728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,3.173664093017578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,3.6795199712117515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,3.631898562113444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,3.2322826385498047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,3.8982931772867837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,3.8045600255330405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,3.2154932022094727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,1.9615786870320637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,2.048917293548584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,1.791210651397705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,1.8779786427815754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,1.8878347078959148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,1.745024045308431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,1.8993919690450032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,1.8761760393778484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,1.7591946919759114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,1.8903199831644695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,1.4340160687764485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,1.948560078938802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,1.6654987335205078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,1.9029067357381184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,1.8692320187886555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,1.8829919497172039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,1.1029706796010335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,1.090341329574585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,0.9657973448435465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,1.0369333426157634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,1.005120038986206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,0.9045173327128092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,1.0084959665934246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,1.0119787057240803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,0.9071893692016602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,1.0132160186767578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,1.0289066632588704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,0.9113653500874838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,1.0341386795043945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,1.025429328282674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,0.6101173162460327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,0.9173759619394938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,0.5551626682281494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,0.6270080010096232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,0.5759413242340088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,0.5756426652272543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,0.5413440068562826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,0.5767840147018433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,0.5875626802444458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,0.5304586489995321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,0.5744533141454061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,0.5893760124842325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,0.5320959885915121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,0.5874346494674683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,0.5902400016784668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,0.5354666709899902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,4.451482772827148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,4.586389223734538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,3.837439854939779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,4.485535939534505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,4.5758771896362305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,3.85101318359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,1.155349334081014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,4.468437194824219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,3.862010637919108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,4.604672114054362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,4.443605422973633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,3.9181226094563804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,4.395285288492839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,2.41921059290568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,2.252895991007487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,2.378042697906494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,2.0196480751037598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,2.1996533075968423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,2.2782559394836426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,2.213146686553955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,2.0367466608683267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,2.2146026293436685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,1.9874025980631511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,2.262213389078776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,2.3405493100484214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,2.2390880584716797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,2.1425867080688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,2.3108372688293457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,1.2367733319600422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,1.2513866424560547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,1.3058133125305176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,1.1595626672108967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,1.0425866444905598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,1.1637600262959797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,1.163370688756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,1.1643733183542888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,1.0440213680267334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,1.1664586861928303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,1.1693013509114583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,1.0490773518880208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,1.1790506839752197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,1.1780640284220378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,1.0598613421122234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,0.673695961634318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,0.686730702718099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,0.6228799819946289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,0.6410346825917562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,0.5825493335723877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,0.643231987953186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,0.6436800161997477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,0.5805759827295939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,0.6454879840215048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,0.647653341293335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,0.5842826763788859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,0.6487413247426351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,0.6528586546579996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,0.5888106822967529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,0.3978240092595418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,0.4035946528116862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,0.3715466658274333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,0.37515731652577716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,0.37806932131449383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,0.3468960126241048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,0.3775413433710734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,0.3778560161590576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,0.34918399651845294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,0.3808586597442627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,0.38122665882110596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,0.35126932462056476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,0.38494932651519775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,0.3856906493504842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,0.3550399939219157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,4.517264048258464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,4.59933344523112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,3.9790932337443032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,4.717311859130859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,4.530490557352702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,3.98798402150472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,4.540981292724609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,4.627711931864421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,4.010341326395671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,4.500752131144206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,4.582613309224446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,4.059674580891927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,2.4583519299825034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,2.4773972829182944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,2.3235626220703125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,2.2188159624735513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,2.237738609313965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,1.9926880200703938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,2.2382399241129556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,2.2664693196614585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,2.0167412757873535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,2.318938732147217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,2.274442672729492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,2.1138505935668945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,2.3255200386047363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,2.2815465927124023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,1.2621866861979167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,2.0427680015563965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,1.284021298090617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,1.153450647989909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,1.1714879671732585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,1.159050703048706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,1.0443893273671467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,1.1596426963806152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,1.1657813390096028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,1.0430293083190918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,1.162453333536784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,1.1757813294728596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,1.0486400127410889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,1.177466630935669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,1.184997320175171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,1.063530683517456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,0.6726293563842773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,0.6853866577148438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,0.616922656695048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,0.6206933259963989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,0.6218826770782471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,0.5617013374964396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,0.6234933137893677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,0.6260106563568115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,0.5651146570841471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,0.6255520184834799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,0.6289600133895874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,0.5671840111414591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,0.6313493251800537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,0.637930671374003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,0.5734560092290243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,0.3808266719182332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,0.38622931639353436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,0.35186131795247394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,0.3489919900894165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,0.34992531935373944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,0.3224479953447978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,0.3506293296813965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,0.3529653151830037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,0.3245439926783244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,0.35609598954518634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,0.3561600049336751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,0.3269279996554057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,0.359605352083842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,0.36130666732788086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,0.3298400044441223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,0.2334186633427938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,0.23697600762049356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.21954667568206787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.215338667233785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.21713600556055704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.19961067040761313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.21330666542053223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.21627734104792276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.20025600989659628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.21618133783340454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.21714133024215698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.20122667153676352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.21913599967956543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.21829867362976074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.20387732982635498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,2.80782413482666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,2.8243414560953775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,2.5393919944763184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,2.833594640096029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,2.8317600886027017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,2.552874724070231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,2.860191980997721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,2.8633705774943032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,2.5712693532307944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,2.893295923868815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,2.9016106923421225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,2.7448959350585938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,1.587930679321289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,1.6136266390482585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,1.4982719421386719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,1.4373067220052083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,1.435754617055257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,1.289562702178955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,1.444976011912028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,1.4367039998372395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,1.2956586678822835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,1.4463680585225422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,1.4557813008626301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,1.311850627263387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,1.4686026573181152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,1.4790879885355632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,0.826848030090332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,1.3281226952870686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,0.8428320089975992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,0.7655039628346761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,0.7516160011291504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,0.7555147012074789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,0.6792746384938558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,0.7582346598307291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,0.7569866975148519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,0.6823039849599203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,0.7580906550089518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,0.7636746565500895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,0.6864426930745443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,0.7705653508504232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,0.7757386366526285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,0.6987520058949789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,0.44914666811625165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,0.45763734976450604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,0.4158133268356323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,0.4094080130259196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,0.4095199902852376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,0.37349867820739746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,0.41234131654103595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,0.4146346648534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,0.37618664900461835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,0.4138880173365275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,0.41679998238881427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,0.37779200077056885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,0.4209866523742676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,0.4240373373031616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,0.38386134306589764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,0.26079465945561725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,0.2643040021260579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,0.24249066909154257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,0.23441600799560547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,0.23293334245681763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.21898667017618814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,0.2340959906578064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,0.23697600762049356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.2184000015258789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,0.23637332518895468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,0.23833600680033365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.22224533557891846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,0.2424160043398539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,0.2428320050239563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.22619199752807617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.16248533129692078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.1639946699142456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.1551253298918406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.15056000153223673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.1508639951546987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.14165332913398743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.14879467089970908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.1504639983177185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.14086932937304178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.150325338045756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.15067199865976968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.1418400009473165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.15036267042160034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.15254933635393778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.14314132928848267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,3.09825070699056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,2.7961387634277344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,3.108784039815267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,3.1109654108683267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,3.1366453170776367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,2.817450523376465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,3.154815991719564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,3.151050567626953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,2.927680015563965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,3.1927359898885093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,3.2166401545206704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,1.7669280370076497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,2.9075892766316733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,1.7878506978352864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,1.6234560012817383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,1.5558560689290364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,1.559775988260905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,1.4070720672607422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,1.5627892812093098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,1.5745654106140137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,1.4159626960754395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,1.5772746404012044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,1.580143928527832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,1.4441866874694824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,1.6008319854736328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,1.6165653864542644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,1.4569652875264485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,0.9063413143157959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,0.918783982594808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,0.8367146650950114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,0.8030400276184082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,0.8082559903462728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,0.7264320055643717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,0.8104693094889323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,0.8134559790293375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,0.7322239875793457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,0.8148693243662516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,0.8205813566843668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,0.7375946839650472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,0.8287306626637777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,0.8352266947428385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,0.7511893113454183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,0.4791946808497111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,0.4896106719970703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,0.4463253418604533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,0.42984533309936523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,0.42998401323954266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,0.39104000727335614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,0.43084800243377686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,0.4331306616465251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,0.3917493422826131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,0.43383999665578205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,0.43693331877390545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,0.3948640028635661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,0.44274667898813885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,0.4447733163833618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,0.4023626645406087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,0.2654026746749878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,0.2714879910151164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,0.24897066752115884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.23646400372187296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,0.2359679937362671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.2202826738357544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,0.23611734310785928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.23921600977579752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.22157333294550577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,0.24158400297164917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,0.2425866723060608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.2227733333905538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,0.24566400051116943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,0.24660799900690714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.22791467110315958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.157642662525177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.16061333815256754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.15057599544525146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.13873599966367087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.1400320033232371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.14030399918556213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.14050133029619852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.13116799791653952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.14011733730634054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.14085867007573447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.13221866885821024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.143994669119517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.14442666371663412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.13645333051681519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.0993280013402303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.10227200388908386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.09993066390355428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.09735467036565144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.09761599699656169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.09315199653307597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.09579199552536011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.09622933467229207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.09161067008972168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.0952959954738617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.09707199533780415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.09293867150942485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.09559999903043111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.09716799855232239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.09325333436330159
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,2.0869439442952475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,2.094106674194336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,1.8871466318766277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,2.095296065012614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,2.1058506965637207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,1.9001653989156086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,2.1146079699198403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,2.127562681833903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,1.919007937113444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,2.1526452700297036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,2.168522675832113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,1.9720533688863118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,1.206005334854126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,1.2248586813608806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,1.1184319655100505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,1.0526293118794758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,1.0585227012634277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,0.9534080028533936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,1.0573653380076091
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,1.0637119611104329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,0.9599839846293131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,1.0697920322418213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,1.0762133598327637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,0.9710506598154703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,1.0885120232899983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,1.0977599620819092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,0.9928320248921713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,0.6251306533813477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,0.6363146702448527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,0.5801493326822916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,0.5494186480840048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,0.5530133247375488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,0.49876264731089276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,0.5533226728439331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,0.5567839940388998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,0.5006399949391683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,0.5566346645355225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,0.5623626708984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,0.5069013436635336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,0.5682613452275594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,0.5731146732966105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,0.5175786813100179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,0.33475732803344727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,0.3429226477940877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,0.31414933999379474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,0.2953919967015584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,0.2957119941711426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.2712799906730652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,0.29796266555786133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,0.29918400446573895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.27342400948206586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,0.30024532477060956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,0.30215466022491455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.27542932828267414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,0.3062346577644348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,0.31034133831659955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,0.281551996866862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.18889067570368448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.19346133867899576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.17814399798711142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.16246933738390604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.16385066509246826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.15436800320943198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.16366400321324667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.16455466548601785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.15477333466211954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.1670186718304952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.16885866721471152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.15839466452598572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.1710240046183268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.17258665959040323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.1607253352801005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.11315199732780457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.11544533570607503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.10957333445549011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.10146666566530864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.10334933797518413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.09317333499590556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.10114666819572449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.09519466757774353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.10300266742706299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.09520000219345093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.10283733407656352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.10415466626485188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.09817066788673401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.07533333202203114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.07318933308124542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.07283733288447063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.07297599812348683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.06869866450627644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.07302399973074596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.07306133210659027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.06902400155862172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.07311466832955678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.07287999987602234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.06842133402824402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.07285866638024648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.07091733316580455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,2.2649973233540854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,2.245680014292399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,2.1219894091288247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,2.3271946907043457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,2.2797813415527344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,2.20197327931722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,2.440394719441732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,2.4261652628580728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,2.61789337793986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,2.4564746220906577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,2.4334452946980796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,2.573402722676595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,1.3107360204060872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,1.3085066477457683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,1.3046133518218994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,1.1423892974853516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,1.133834679921468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,1.0737120310465496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,1.1570933659871419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,1.1475253105163574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,1.0959733327229817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,1.2056907018025715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,1.1940373579661052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,1.2999359766642253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,1.2124799887339275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,1.1962186495463054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,1.2718613147735596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,0.6663519938786825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,0.657039999961853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,0.6563413143157959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,0.5872906843821207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,0.5833813349405924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,0.5481653213500977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,0.5937973260879517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,0.590170661608378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,0.5608319838841757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,0.6136746803919474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,0.6102666854858398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,0.6427839994430542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,0.6155253251393636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,0.6114346583684286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,0.6233866612116495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,0.3492480119069417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,0.3421386480331421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,0.3420373201370239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,0.30744532744089764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,0.3060533404350281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.28544000784556073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,0.3116533358891805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,0.309663991133372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.29312533140182495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,0.3215679923693339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,0.3201333284378052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,0.31648532549540204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,0.321669340133667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,0.32070932785669964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,0.312336007754008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,0.18936532735824585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,0.18577067057291666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.18365333477656046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.16550399859746298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.16523733735084534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.15361066659291586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.16711999972661337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.16526400049527487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.15847999850908914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.17414400974909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.1745120088259379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.16909867525100708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.17525867621103922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.17422932386398315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.16731733083724976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.10917866230010986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.10694400469462077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.09307733178138733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.09307199716567993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.08702400326728821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.09518399834632874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.09537600477536519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.08740267157554626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.09775466720263164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.09722666939099629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.0939359962940216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.09777599573135376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.09937600294748943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.09589866797129314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.06301333506902058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.06252266466617584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.06287999947865804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.058592001597086586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.05957333246866862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.0553653339544932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.05867200096448263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.060559997955958046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.060458665092786155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.06084266801675161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.060746664802233376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.05723733206590017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.03952533255020777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.035829332967599235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,1.9413706461588542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,1.926261266072591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,1.8333813349405925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,2.0011253356933594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,1.9574346542358398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,1.9021813074747722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,2.1229920387268066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,2.1046293576558432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,2.3293919563293457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,2.1332319577534995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,2.1055572827657065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,2.291440010070801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,1.1353120009104412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,1.1436586380004883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,1.152341365814209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,0.9848960240681967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,0.9768053690592448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,0.9292426904042562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,0.9955626328786215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,0.9863573710123698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,0.948906660079956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,1.0502986907958984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,1.0382346312204997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,1.160154660542806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,1.0513599713643391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,1.035050630569458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,1.1305332978566487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,0.5786186854044596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,0.5741600195566813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,0.5811520020167033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,0.5062613487243652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,0.501584013303121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,0.4724053144454956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,0.512223998705546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,0.5089173316955566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,0.48677865664164227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,0.5290720065434774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,0.5263359944025675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,0.5678720076878866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,0.5312426487604777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,0.5285066763559977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,0.5449173450469971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,0.3022666573524475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,0.2980960011482239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,0.3007306655248006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.26395734151204425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.26184000571568805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.24638932943344116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,0.2688053250312805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,0.2665173411369324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.25251734256744385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,0.2763413389523824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,0.27534399429957074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.27908267577489215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,0.27877867221832275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,0.2769013245900472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.27447466055552167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,0.1641279955705007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.1613920032978058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.16193067034085593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.1422719955444336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.14200533429781595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.13215999801953635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.1444960037867228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.14284800489743552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.13611732920010886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.14999467134475708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.14970133701960245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.14679466684659323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.15009599924087524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.14910399913787842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.14640000462532043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.09478933612505595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.09332799911499023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.09268266956011455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.07919999957084656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.07898666461308797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.0746506651242574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.08084266881148021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.08060800035794576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.07441066702206929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.08331733445326488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.08107733229796092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.082997332016627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.08298666775226593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.08229866623878479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.052890668312708534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.053226664662361145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.05027199784914652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.0521066685517629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.05194666484991709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.049226666490236916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.05022933085759481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.03389333436886469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.027744000156720478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.026341333985328674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,0.8944586912790934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,0.8877759774525961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,0.847109317779541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,0.9085066318511963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,0.8943626880645752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,0.8900319735209147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,0.9617919921875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,0.9505333105723063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,1.0786399841308594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,0.966101328531901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,0.9465386867523193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,1.0465280214945476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,0.5315946737925211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,0.5230026642481486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,0.5389279921849569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,0.46913599967956543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,0.45422399044036865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,0.43066132068634033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,0.464789350827535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,0.4608373244603475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,0.44834665457407635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,0.49456532796223956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,0.4798933267593384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,0.5242826541264852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,0.485914667447408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,0.48237331708272296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,0.5202720165252686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,0.2804266611735026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,0.2760533293088277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,0.2821279962857564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.2393653392791748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.23969600598017374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.22606933116912842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.2446613311767578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.24238399664560953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.23463465770085654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,0.2539680004119873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,0.2534079949061076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.25964800516764325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,0.2542346715927124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,0.25283199548721313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.2541919946670532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,0.15377066532770792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.1508906682332357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.1509706676006317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.12773866454760233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.12770666678746542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.12064533432324727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.12973333398501077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.12818666299184164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.12425067027409871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.1341600020726522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.1346879998842875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.13699199755986533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.13823466499646506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.13793599605560303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.13336533308029175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.08981866637865703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.07362133264541626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.0746666689713796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.07396266857783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.06863999863465627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.07829333345095317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.07685333490371704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.07884266475836436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.07682133217652638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.048341333866119385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.04298133154710134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.046096002062161766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.023567999402681988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,0.49667731920878094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,0.49052266279856366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,0.4683573246002197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,0.5094826618830363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,0.5019306739171346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,0.4814346631368001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,0.5272106726964315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,0.5226613283157349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,0.5582613150278727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,0.5293973286946615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,0.5218613147735596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,0.5452800194422404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,0.2948213418324788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,0.29093867540359497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,0.29546133677164715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.25658132632573444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.25274133682250977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.24040534098943075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.2629973292350769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.2592373291651408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.24890132745107016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.27315733830134076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.27063467105229694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.27052799860636395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,0.27318400144577026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,0.2696160078048706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.2650453249613444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,0.15836800138155618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.15617600083351135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.1569760044415792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.13589333494504294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.13621333241462708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.1290613313515981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.14079466462135315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.14010133345921835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.13396799564361572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.14586666226387024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.14446399609247842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.14202133814493814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.14641066392262778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.14637333154678345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.1428000032901764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.08848533034324646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.08686400453249614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.08874133229255676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.07574399809042613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.07065066695213318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.0754613329966863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.07889600098133087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.07912533481915791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.08088000118732452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.07912533481915791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.07824000219504039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.050154666105906166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.04612799982229868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.04208533465862274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.043749332427978516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.04747200012207031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.046485334634780884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.04663999875386556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.031925333042939506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.02179199953873952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.020213333268960316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.020037333170572918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.01775466650724411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.3299413323402405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.32613333066304523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.3094506661097209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.3338133494059245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.328058660030365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.3106773296991984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.3466133276621501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.3423519929250081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.3343626658121745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,0.3460693359375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,0.340773344039917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.3336533308029175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,0.19221333662668863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.19051732619603476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.1895680030186971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.17250667015711466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.17052799463272095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.1622719963391622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.17291200160980225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.17208532492319742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.16446399688720703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.18128534158070883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.17916800578435263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.1747573415438334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.18198933204015097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.18007999658584595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.1748639941215515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.10667199889818828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.10492799679438274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.09458133578300476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.0932426651318868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.09522133072217305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.09444266557693481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.09020800391832988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.0990773340066274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.09717866778373718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.0949173370997111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.10103999574979146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.09889599680900574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.0953493316968282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.05835199852784475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.054144000013669334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.05023466547330221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.05630933245023092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.05638933181762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.05630933245023092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.05425066749254862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.037434667348861694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.03589866558710734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.03658666710058848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.0258240004380544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.2466933329900106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.24560532967249551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.23371734221776327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.24851733446121216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.24595733483632407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.23373866081237793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.2523253361384074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.2529813249905904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.24632533391316733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.2550879915555318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.2546773354212443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.24571200211842856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.14151466886202493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.14046933253606161
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.1386186679204305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.13152000308036804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.12380266189575195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.13247467080752054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.13130133350690207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.12381333112716675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.13541866342226663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.13582400480906168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.13132799665133157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.13530666629473367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.13607999682426453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.12989866733551025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.07743999858697255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.07647466659545898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.0748533308506012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.06845333178838094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.07242133220036824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.06879466772079468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.07447466750939687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.07464000085989635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.0706879993279775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.07547200222810109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.07702933251857758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.07293333113193512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.04667200148105621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.04612799982229868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.04237333436806997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.04606399933497111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.030597334106763203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.030640001098314922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.03183466692765554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.20378132661183676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.20381333430608115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.19421867529551187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.20399999618530273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.20195732514063516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.1947093407313029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.20627733071645102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.2060799996058146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.2010400096575419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.2067626714706421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.20585066080093384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.2018773357073466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.11342933773994446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.11200533310572307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.11129066348075867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.10929066936175029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.10758933424949646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.10341866811116536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.10827733079592387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.10764799515406291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.10461333394050598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.1095306674639384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.11097066601117452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.10757866501808167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.1095306674639384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.10961600144704182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.10724799831708272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.0640533318122228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.06379200021425883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.06256533165772755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.06250666578610738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.06035733222961426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.06390933195749919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.06426133215427399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.0642986645301183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.0626933326323827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.03809066613515218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.03789866715669632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.038165333370367684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,1.40610138575236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,10.790133158365885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,10.939567565917969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,8.146661122639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,11.478917439778646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,10.975125630696615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,8.231781641642252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,11.288976033528646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,11.602010091145834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,8.372261047363281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,11.409103393554688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,11.072101593017578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,8.286005020141602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,5.161834716796875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,4.974639892578125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,4.353770573933919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,5.441680272420247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,5.34115727742513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,4.221359888712565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,5.172320048014323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,5.259402592976888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,4.201210657755534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,5.066277186075847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,5.444709142049153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,4.182741483052571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,4.9563093185424805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,4.953493436177571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,4.231482823689778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,2.728133201599121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,2.691194534301758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,2.3822506268819175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,2.5139412879943848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,2.533461411794027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,2.541205406188965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,2.516277313232422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,2.7695786158243814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,2.571232000986735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,2.5262293815612793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,2.5015625953674316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,2.2167466481526694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,2.5422186851501465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,2.5503199895222983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,2.4867307345072427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,1.3993013699849446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,1.4251413345336914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,1.2840960025787354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,1.3751840591430664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,1.3683946927388508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,1.2263147036234539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,1.3707466125488281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,1.4172479311625164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,1.2827466328938801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,0.6558613379796346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,1.3748586972554524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,1.4200159708658855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,1.282586654027303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,1.3977759679158528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,1.4080053965250652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,1.4159679412841797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,5.854330698649089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,6.319109598795573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,4.937466621398926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,6.439194361368815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,6.182528177897136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,4.891754786173503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,5.871568044026692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,6.62222417195638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,4.909562746683757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,6.776048024495442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,5.687221527099609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,4.961317380269368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,3.1526241302490234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,3.10042667388916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,2.9942665100097656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,3.2566506067911782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,3.0484533309936523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,2.639354705810547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,2.892474810282389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,3.4163360595703125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,2.5211413701375327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,3.1125707626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,2.8880160649617515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,3.060821215311686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,3.0589386622111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,2.928096135457357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,2.867168108622233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,1.6403039296468098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,1.5825759569803874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,1.559999942779541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,1.413365364074707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,1.3526986440022786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,1.5276373227437336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,1.543242613474528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,1.5312533378601074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,1.4839359919230144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,1.5346293449401855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,1.5491305987040203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,1.4375893274943035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,1.6479840278625488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,1.5710560480753581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,0.8800426324208578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,1.5853652954101562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,0.8869226773579916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,0.8011946678161621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,0.856229305267334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,0.864186684290568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,0.7736266454060873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,0.8550559679667155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,0.8581120173136393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,0.7735093434651693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,0.8590880235036215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,0.8747093677520752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,0.7774720191955566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,0.8687787055969238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,0.8673493067423502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,0.7810239791870117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,4.385823885599772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,4.605850537618001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,3.53932253519694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,4.441973368326823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,4.0742292404174805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,3.628133455912272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,4.160789489746094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,4.239775975545247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,4.115440050760905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,4.23854923248291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,4.14087454477946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,4.215418815612793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,2.332453409830729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,2.1974719365437827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,1.9475785891215007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,2.0944693883260093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,2.1020426750183105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,2.039050738016764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,2.099461396535238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,2.103333314259847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,1.8495786984761555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,2.1909119288126626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,2.112816015879313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,1.8911733627319336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,2.1203786532084146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,2.145594596862793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,1.9176479975382488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,1.3165333271026611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,1.1771519978841145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,1.0997973283131917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,1.1254346370697021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,1.1588799953460693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,1.0037439664204915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,1.1299573580423992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,1.1289386749267578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,1.0050346851348877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,1.1331946849822998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,1.1408639748891194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,1.0093706448872883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,1.139562686284383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,1.144426663716634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,1.0132319927215576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,0.6632373332977295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,0.6714293162027994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,0.6076906522115072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,0.6419999996821085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,0.6410026550292969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,0.5813493331273397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,0.6444586515426636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,0.6444480021794637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,0.583903988202413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,0.6491146485010783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,0.6498239835103353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,0.5858453512191772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,0.6515733400980631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,0.6577119827270508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,0.5907839934031168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,5.927824020385742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,6.0174986521403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,4.77460257212321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,5.942037582397461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,5.973498662312825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,4.763770739237468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,5.584133148193359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,5.694277445475261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,4.785322825113933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,5.9470774332682295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,5.7389068603515625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,3.1120052337646484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,4.830106735229492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,3.070506731669108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.129120002190272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,2.583669344584147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,2.749482790629069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,2.415616035461426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,2.7239198684692383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,2.7729546229044595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,2.7908105850219727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,2.566282590230306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,2.9015731811523438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,2.4374186197916665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,3.0752105712890625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,2.820704142252604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,2.7890666325887046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,2.618645350138346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,1.5286293029785156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,1.6601920127868652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,1.35535462697347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,1.4782880147298176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,1.2733439604441326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,1.4597333272298176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,1.4921120007832844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,1.2773973147074382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,1.447098731994629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,1.4931947390238445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,1.2915146350860596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,1.4481760660807292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,1.5788480440775554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,1.358890692392985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,1.4636054039001465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,0.8491946856180826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,0.7431519826253256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,0.8282879988352457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,0.7851413091023763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,0.7906880378723145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,0.7014453411102295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,0.7869333426157633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,0.7890506585439047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,0.7039360205332438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,0.8034773667653402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,0.793615976969401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,0.7084480126698812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,0.8018186887105306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,0.8088586330413818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,0.72325332959493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,0.4813760121663411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,0.4407626787821452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,0.46298666795094806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,0.4503573179244995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,0.41627732912699383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,0.4529706637064616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,0.465338667233785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,0.4173813263575236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,0.46722666422526044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,0.4628053506215413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,0.42052265008290607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,0.4660266637802124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,0.4670026699701945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,0.4250826835632324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,3.3155199686686196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,3.3046134312947593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,3.275146802266439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,3.4603840510050454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,3.3421548207600913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,2.956512133280436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,3.4485225677490234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,3.3406346638997397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,2.9619626998901367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,3.5598827997843423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,3.4188159306844077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,3.207215944925944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,2.0813066164652505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,1.8331626256306965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,1.7566240628560383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,1.7490506172180176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,1.5225013097127278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,1.701306660970052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,1.7169920603434246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,1.7358026504516602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,1.6150612831115723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,1.7203466097513835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,1.775770664215088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,1.7336053848266602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,1.7312374114990234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,1.7608426411946614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,0.9714293479919434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,1.5440533955891926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,0.9814559618631998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,0.9547786712646484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,0.9016213417053223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,0.9029013315836588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,0.8161546389261881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,0.8993706703186035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,0.9058079719543457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,0.8083413441975912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,0.9106826782226562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,0.9126293659210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,0.9173546632130941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,0.8150293032328287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,0.8235840002695719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,0.9216000239054362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,0.5390293200810751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,0.5337119897206625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,0.4862186511357625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,0.5071093241373698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,0.5013546546300253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,0.4540853500366211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,0.5029706557591757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,0.4570399920145671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,0.5095200141270956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,0.5112586816151937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,0.46059199174245197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,0.5127626657485962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,0.5156106551488241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,0.463861346244812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,0.31959466139475506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,0.3243359923362732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,0.29552000761032104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,0.30001600583394367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,0.30056534210840863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.27342400948206586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,0.300165335337321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,0.3016800085703532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.2773653268814087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,0.3028159936269124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,0.30373332897822064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.2791999975840251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,0.30719467004140216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,0.30986666679382324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.2835200031598409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,3.4050401051839194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,3.4135252634684243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,3.031221389770508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,3.440021197001139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,3.4393491744995117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,3.0515359242757163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,3.453002611796061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,3.466304143269857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,3.0847946802775064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,3.5141385396321616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,3.6419572830200195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,3.12660280863444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,1.8839200337727864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,1.9111413955688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,1.7084852854410808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,1.7174773216247559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,1.727392037709554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,1.531050682067871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,1.73416535059611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,1.738634745279948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,1.5449387232462566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,1.7462773323059082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,1.760858694712321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,1.5754186312357585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,1.7715840339660645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,1.8405812581380208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,1.5823893547058105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,0.9934879938761393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,0.9925866921742758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,0.8902826309204102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,0.9024746417999268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,0.9090346495310465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,0.8028639952341715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,0.9034773508707682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,0.906938632329305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,0.8045653502146403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,0.9154720306396484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,0.915450652440389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,0.814575990041097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,0.9227413336435953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,0.9295519987742106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,0.8272319634755453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,0.5268906752268473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,0.536517341931661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,0.4812479813893636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,0.48897600173950195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,0.48870400587717694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,0.4388586680094401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,0.4917706648508708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,0.49234668413798016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,0.44093867142995197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,0.49485333760579425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,0.4968373378117879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,0.4453333218892415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,0.5009546677271525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,0.5059680143992106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,0.4511200189590454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,0.302239994208018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,0.3083146611849467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,0.2791999975840251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,0.2794453303019206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,0.27535466353098553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.25492266813913983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,0.2786879936854045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,0.2802613377571106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.2564319968223572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,0.2836373249689738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,0.2860106627146403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.261135995388031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,0.28883200883865356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,0.28919466336568195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.2630400061607361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.18804800510406494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.19030400117238364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.17602666219075522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.17552000284194946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.17610132694244385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.1632213294506073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.1765013337135315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.18078400691350302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.16526400049527487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.17742933829625449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.1768266757329305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.16292799512545267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.17696533600489298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.17882666985193887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.16548800468444824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,2.181978702545166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,2.1898667017618814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,1.9512267112731934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,2.1977173487345376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,2.2478079795837402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,2.027232011159261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,2.2236266136169434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,2.2422666549682617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,1.9867946306864421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,2.2575999895731607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,2.3002026875813804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,2.0287626584370932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,1.2324799696604412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,1.2482879956563313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,1.119605302810669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,1.1185226440429688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,1.1200586954752605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,0.9979680379231771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,1.122752030690511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,1.1263466676076253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,1.0010933081309001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,1.1307520071665447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,1.1382826964060466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,1.012613296508789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,1.1518560250600178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,1.1605599721272786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,1.0337653160095215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,0.6468106508255005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,0.6591519912083944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,0.5914293527603149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,0.5916426579157511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,0.5939573446909586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,0.5302559932072958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,0.5927520195643107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,0.5961386760075887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,0.5322133302688599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,0.599237322807312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,0.6044853528340658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,0.5377973318099976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,0.6077119906743368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,0.6132853428522745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,0.5477013190587362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,0.3552853266398112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,0.3625013430913289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,0.3274506727854411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,0.3242986599604289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,0.32496533791224164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,0.2935466567675273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,0.32603200276692706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,0.3280959924062093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,0.29792000850041706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,0.3297226627667745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,0.33074132601420086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,0.29792000850041706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,0.33585067590077716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,0.33929065863291424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,0.30594666798909503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,0.20906666914621988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,0.2130133310953776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.19369065761566162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.18717867136001587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.18783466021219888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.17136534055074057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.18923733631769815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.1905440092086792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.17287999391555786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.19029333194096884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.19236266613006592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.1770240068435669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.19456533590952554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.19686400890350342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.18171199162801108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.1316373348236084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.1318826675415039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.1251146694024404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.12586667140324911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.12621333201726279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.1179093321164449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.12628266215324402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.1252906620502472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.11755200227101643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.12569066882133484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.12627733747164407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.11779733498891194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.12521066268285116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.12622933586438498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.11800000071525574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,2.436000029246012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,2.4461867014567056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,2.1629279454549155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,2.460890611012777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,2.468719959259033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,2.1989547411600747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,2.5135413805643716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,2.513360023498535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,2.2365280787150064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,2.514965375264486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,2.5498506228129068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,2.2560319900512695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,1.3712587356567383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,1.3918827374776204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,1.2493759791056316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,1.2199947039286296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,1.2261813481648762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,1.0893279711405437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,1.2289013067881267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,1.2338773409525554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,1.0987146695454915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,1.2422719796498616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,1.248698631922404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,1.1124053001403809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,1.267855962117513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,1.2785600026448567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,1.1384426752726238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,0.7090453306833903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,0.7219946384429932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,0.6490933497746786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,0.6357760032018026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,0.6388106743494669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,0.5667946736017863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,0.6386293172836304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,0.642245332400004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,0.5696213245391846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,0.6466240088144938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,0.650709350903829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,0.5779680013656616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,0.6585813363393148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,0.6635093291600546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,0.5907946825027466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,0.38014400005340576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,0.3882026672363281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,0.34858667850494385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,0.3406613270441691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,0.3421440124511719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,0.3054666717847188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,0.34362133344014484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,0.3452213207880656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,0.3102186719576518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,0.3469173510869344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,0.34906665484110516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,0.31228800614674884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,0.35345598061879474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,0.35645333925882977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,0.3202400008837382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,0.21412799755732217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,0.21768534183502197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.19676266113917032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.18731733163197836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.1895786722501119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.17363733053207397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.1904266675313314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.19141334295272827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.17653866608937582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.19358932971954346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.19577600558598837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.17875200510025024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.19810134172439575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.2006346583366394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.18236267566680908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.12819733222325644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.13045333822568259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.11985599994659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.11550933122634888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.11714667081832886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.10569066802660625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.11729066570599873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.11750400066375732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.10519466797510783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.11635200182596843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.11770666639010112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.10717866818110149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.11921599507331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.11967466274897258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.11221866806348164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.08542933066685994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.0865066647529602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.08119999865690868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.08337600032488506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.0837653378645579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.07684800028800964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.08276799817879994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.08270933230717976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.08329600095748901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.08468799789746602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.0848640004793803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.08433600266774495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,1.66212797164917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,1.668992042541504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,1.476469357808431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,1.679109255472819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,1.6854346593221028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,1.4909173647562664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,1.7236266136169434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,1.723903973897298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,1.5298293431599934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,1.7248640060424805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,1.73416535059611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,1.5433227221171062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,0.9516639709472656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,0.9616373380025228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,0.866533358891805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,0.838101307551066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,0.84169602394104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,0.746783971786499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,0.8429866631825765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,0.8471519947052002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,0.7526613076527914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,0.8522186279296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,0.8589653174082438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,0.7623999913533529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,0.872058629989624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,0.8784213066101074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,0.783573309580485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,0.49516268571217853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,0.502560019493103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,0.4516799847284953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,0.43984532356262207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,0.4418293237686157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,0.3927146593729655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,0.4432053168614705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,0.4442506631215413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,0.3949706554412842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,0.446394681930542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,0.44952531655629474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,0.4002506732940674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,0.4551466703414917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,0.46090133984883624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,0.41041068236033124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,0.2680480082829793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,0.273637334505717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,0.246778666973114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.23634666204452515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.2371093432108561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.21522667010625204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.23821866512298584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.23957866430282593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.2177706758181254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,0.2428320050239563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,0.243450661500295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.21926399072011313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,0.24901866912841797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,0.2508853276570638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.22618132829666138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.15382400155067444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.15572800238927206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.14205867052078247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.13144532839457193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.1323360006014506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.12060800194740295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.13335466384887695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.13430399696032205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.12240533034006755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.13521599769592285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.13590932885805765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.12592533230781555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.14043733477592468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.14120533068974814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.130730668703715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.09098666906356812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.09278933207194011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.09007466832796733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.08587200442949931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.08692800005276997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.07844799757003784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.0851093331972758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.08583466211954753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.08745066324869792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.08890133102734883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.07890133559703827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.08642133076985677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.08771733442942302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.08144533137480418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.060549333691596985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.06070933242638906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.05834666887919108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.056549335519472756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.056362668673197426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.05843733251094818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.05416533350944519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,1.7573493321736653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,1.7616267204284668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,1.6542399724324544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,1.7905012766520183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,1.8036266962687175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,1.7915786107381184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,1.8134026527404785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,1.8165547053019206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,1.8085546493530273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,1.9146025975545247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,1.9051413536071777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,1.8478612899780273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,1.0329173405965169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,1.0136746565500896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,0.9938186804453532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,0.8933493296305338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,0.8937119642893473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,0.8404800097147623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,0.9067520300547282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,0.9063359896341959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,0.8656480312347412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,0.9164533615112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,0.9170239766438802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,0.8759040037790934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,0.9486506779988607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,0.9387466907501221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,0.9222826957702637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,0.5315093199412028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,0.5204693476359049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,0.5105439821879069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,0.45981868108113605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,0.45866668224334717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,0.4302080074946086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,0.4664693276087443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,0.46535468101501465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,0.43511466185251874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,0.4723200003306071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,0.4722293217976888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,0.44684799512227374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,0.48441068331400555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,0.4814133246739705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,0.4596800009409587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,0.28325867652893066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,0.27543999751408893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,0.26945600907007855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,0.2443093260129293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,0.24270933866500854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.22401599089304605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,0.24784000714619955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,0.24730134010314941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.23040533065795898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,0.2500373323758443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,0.24841066201527914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.23401600122451782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,0.2577066620190938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,0.2558986743291219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.24224533637364706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.1556106706460317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.15268799662590027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.1479520003000895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.13351466258366904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.1339466671148936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.12180266777674358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.1339306632677714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.1353653371334076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.126720001300176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.1369653344154358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.13711999853452048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.12898133198420206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.14129066467285156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.1406773328781128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.13377599914868674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.09078933795293172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.0892693301041921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.07886399825414021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.07060266534487407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.07921599845091502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.08243200182914734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.07241066793600719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.07863999903202057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.07940799991289775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.0738613357146581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.08163733283678691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.08126933375994365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.07654400169849396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.056661332647005715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.056330665946006775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.053557331363360085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.05301866432030996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.04830400149027506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.0543093333641688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.04923733572165171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.04833599925041199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.03608000030120214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.03467733412981033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.035802667339642845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.0351200004418691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.03676266719897588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.03637866675853729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.036501333117485046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,1.5116106669108074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,1.5105387369791667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,1.432154655456543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,1.5709546407063801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,1.5879467328389485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,1.5665119489034016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,1.599562644958496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,1.6053600311279297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,1.6003093719482422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,1.6669920285542805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,1.6515307426452637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,1.6115892728169758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,0.9038026332855225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,0.8960533142089844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,0.8802506923675537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,0.7674240271250407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,0.7690719763437907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,0.7253119945526123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,0.7822879950205485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,0.7823306719462076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,0.7483200232187907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,0.7922399838765463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,0.7943627039591471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,0.778544028600057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,0.8293333053588867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,0.8156320254007975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,0.465770681699117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,0.8047626813252767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,0.45182931423187256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,0.4546293417612712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,0.396565318107605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,0.3950986862182617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,0.3734079996744792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,0.40358932813008624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,0.4036480188369751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,0.37941332658131915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,0.40966399510701496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,0.407968004544576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,0.38808000087738037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,0.42029333114624023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,0.4182666540145874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,0.4012746810913086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,0.2500373323758443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,0.2407146692276001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,0.23850667476654053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.20973867177963257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.2114773392677307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.19381866852442423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.21334399779637656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.21290133396784464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.20015466213226318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.2144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.21619733174641928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.20174400011698404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.22460800409317017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.22021333376566568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.1358560025691986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.21072532733281454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.13338667154312134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.13056533535321554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.11452800035476685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.11322666207949321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.10556800166765849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.1156213382879893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.11755200227101643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.10956799983978271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.11780266960461934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.11571733156840007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.11079466342926025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.1225279966990153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.1218986709912618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.11750933527946472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.07747733096281688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.0778186668952306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.0787306676308314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.06854400038719177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.06835733354091644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.06690133114655812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.06720533470312755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.06878933310508728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.06892266869544983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.06159999966621399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.06911999980608623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.06879466772079468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.0636106679836909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.04822400212287903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.04818666477998098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.044666667779286705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.04673066735267639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.045221333702405296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.04204800228277842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.04605866471926371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.04640533526738485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.04622933268547058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.03198933353026708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,0.6900693575541178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,0.6930346488952637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,0.6608959833780924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,0.7036426862080892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,0.7036746342976888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,0.688426653544108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,0.7136267026265463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,0.7126080195109049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,0.6890827020009359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,0.7466453711191813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,0.7392000357309977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,0.7521599928538004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,0.4274453322092692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,0.4164106845855713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,0.41811732451121014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,0.35448535283406574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,0.3529013395309448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,0.3409493366877238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,0.36315735181172687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,0.36101333300272626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,0.34782934188842773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,0.36666667461395264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,0.3657706578572591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,0.3553226788838704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,0.38070933024088544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,0.3781546751658122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,0.23009065786997476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,0.36797332763671875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,0.2246133287747701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,0.2211946646372477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.1902880072593689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.18835733334223428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.17697600523630777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.19223467508951822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.19205333789189658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.18301333983739218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.19683732589085898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.19473065932591757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.18592000007629395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.2042400042215983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.2028533418973287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.19614400466283163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.12809600432713827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.12451199690500896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.1225333313147227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.1076853374640147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.1076800028483073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.09604799747467041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.1067519982655843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.10737599929173787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.10011200110117595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.10971732934315999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.1097866694132487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.10297600428263347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.1135093371073405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.11297067006429036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.10795733332633972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.07168533404668172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.07274666428565979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.05414933462937673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.06057066718737284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.062181333700815834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.0561706672112147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.06126933296521505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.06142933170000712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.0621066689491272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.06424533327420552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.05922666688760122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.04450666904449463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.04201066493988037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.04205866654713949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.04236799975236257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.0432533323764801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.04340266684691111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.02802666773398717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.02905600021282832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,0.385535995165507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,0.38486401240030926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.3712586561838786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,0.39104000727335614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,0.3917173147201538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.37859201431274414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,0.39637335141499835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,0.39450132846832275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,0.3833706776301066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,0.40773868560791016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,0.4065813223520915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,0.40650665760040283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,0.23679467042287192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,0.23200533787409464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,0.23600532611211142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.20339733362197876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.20185067256291708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.19216533501942953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.20453866322835287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.20548266172409058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.19945067167282104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.20733332633972168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.20692267020543417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.19964800278345743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.21315733591715494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.21211200952529907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.20959466695785522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.12890666723251343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.12581866979599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.12808000048001608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.11136533816655476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.11148800452550252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.10242666800816853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.1125386655330658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.10543466607729594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.11313066879908244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.11172800262769063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.10912000139554341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.11732799808184306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.11665067076683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.11539733409881592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.07286400099595387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.07067733506361644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.0757173349459966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.06471999982992808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.06447466711203258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.06515733400980632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.06494399905204773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.06583466629187266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.06133866806825002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.04166933397452036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.042805333932240806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.04115733255942663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.036517334481080375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.03057066599527995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.02845866729815801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.02805333336194356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.022101332743962605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.2558773358662923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.25522667169570923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.2390986680984497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.25867732365926105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.25802133480707806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.24621333678563437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.25996800263722736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.26047466198603314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.2476266622543335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.2664159933725993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.26583999395370483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.2581920027732849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.1546346644560496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.15246400237083435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.15040000279744467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.1367680033047994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.13759467005729675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.12639466921488443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.13707733154296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.13777599732081094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.1296266714731852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.13847466309865317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.13894400000572205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.13353600104649863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.14225066701571146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.14082666238149008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.13955199718475342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.08593599994977315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.0846026639143626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.08661333719889323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.07681066791216533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.06956799825032552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.0767146646976471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.07843199868996938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.07076799869537354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.07835199932257335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.07788800199826558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.07002133131027222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.07863466441631317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.07435200115044911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.04865066707134247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.04869333406289419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.04774933556715647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.04791999856630961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.043023998538653054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.04740266501903534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.047509332497914634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.04304533203442892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.03302400062481562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.031141333281993866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.029071999092896778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.031888000667095184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.03234133372704188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.02991466720898946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.19125332434972128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.19154665867487589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.17738133668899536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.1913813352584839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.19213332732518515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.18121065696080527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.19216533501942953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.19124799966812134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.18347734212875366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.19529066483179727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.19543999433517456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.1900320053100586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.11215466260910034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.1106666624546051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.11153067151705424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.10359467069307964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.10355200370152791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.09490133325258891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.10373333096504211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.09523733456929524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.10478933652242024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.10346666971842448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.10557333628336589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.10500267148017883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.09897067149480183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.06313066681226094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.06266666452089946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.060080001751581825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.055685331424077354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.059989333152770996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.06046399970849355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.06078400214513143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.05717866619427999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.06154133379459381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.06182933350404104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.05658133327960968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.037263999382654824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.028629332780838013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.1564533313115438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.15583466490109762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.14761066436767578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.15787200133005777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.15683199961980185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.1507253348827362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.1569546659787496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.15678399801254272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.15007999539375305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.1588533322016398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.15692266821861267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.1520960032939911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.08798933029174805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.08798399567604065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.08637866377830505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.08706667025883992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.08089066545168559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.08686400453249614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.08715732892354329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.08268266419569652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.08687466382980347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.08080533146858215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.08680533369382222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.08667199810345967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.08257600168387096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.05227733155091604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.052426666021347046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.051114668448766075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.05017066498597463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.05093866586685181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.051488002141316734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.04844800134499868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.05203733344872793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.05212266743183136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.050026665131251015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.05195199946562449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.05249066650867462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.05150400102138519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.03389866650104523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.03489600121974945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.024703999360402424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.020655999581019085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.01781333362062772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,7.014213562011719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,6.6081116994222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,5.43617057800293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,6.4177811940511065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,6.891408284505208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,5.492944081624349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,7.025893529256185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,6.848880132039388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,5.526293436686198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,7.004543940226237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,6.826143900553386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,5.5287520090738935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,3.317808151245117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,3.3492746353149414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,2.914560000101725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,3.2347145080566406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,3.372730573018392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,3.0238240559895835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,3.2122294108072915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,3.51584529876709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,2.8086134592692056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,3.3080479303995767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,3.4053173065185547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,2.8209546407063804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,3.2692480087280273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,3.2758665084838867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,2.8724800745646157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,1.7521920204162598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,1.7861812909444172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,1.5507787068684895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,1.710800011952718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,1.780394713083903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,1.511525313059489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,1.7088319460550945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,1.7837440172831218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,1.5651200612386067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,1.7192479769388835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,1.7376160621643066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,1.5498186747233074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,1.7245012919108074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,1.734282652537028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,1.6774667104085286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,0.9764586289723715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,0.9858400026957194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,0.898197333017985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,0.9566880067189535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,0.9469546476999918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,0.8522613048553467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,0.9502826531728109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,0.953775962193807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,0.8532373110453287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,0.9554186662038168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,0.9628799756368002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,0.8595786889394125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,0.9615893363952637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,0.9666293462117513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,0.8622666994730631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,3.73140811920166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,3.996522585550944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,3.260767936706543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,3.856186548868815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,4.0330400466918945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,3.2869014739990234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,0.48187732696533203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,4.003669420878093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,4.036506652832031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,3.3067038853963218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,4.161493301391602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,3.9306774139404297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,2.0766560236612954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,3.341925303141276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,2.008549372355143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,1.7794240315755208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,2.069200038909912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,2.0369226137797036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,1.7902132670084636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,1.9455520311991374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,1.9064319928487141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,1.7021439870198567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,2.003866672515869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,1.9514187177022297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,1.8624213536580403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,1.9966559410095215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,1.960085391998291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,1.8179680506388347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,1.107957363128662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,1.104474703470866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,0.9696373144785563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,1.0409759680430095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,1.0387946764628093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,0.9295893510182699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,1.061205307642619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,1.045962651570638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,0.9433066844940186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,1.0514933268229167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,1.068399985631307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,0.9382879734039307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,1.0591946442921956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,1.0557973384857178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,0.9490506649017334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,0.6156746546427408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,0.6242613395055135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,0.5627413193384806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,0.5966506799062093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,0.5978453159332275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,0.5533226728439331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,0.6005546649297079
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,0.6075040102005005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,0.5431840022404989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,0.5959733327229818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,0.6071626742680868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,0.5478613376617432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,0.6101919809977213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,0.6124853293100992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,0.5531626542409261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,2.6855945587158203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,2.7063681284586587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,0.5125919977823893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,2.373258590698242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,2.757136027018229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,2.7307732899983725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,2.3834080696105957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,2.7743094762166343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,2.96120548248291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,2.3987679481506348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,2.913109461466471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,3.258768081665039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,2.4426933924357095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,1.5210240681966145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,1.513055960337321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,1.3173866271972656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,1.4278292655944824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,1.5239839553833008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,1.2489120165507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,1.4118186632792156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,1.5971573193868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,1.25326935450236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,1.433359940846761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,1.4712533950805664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,1.262437343597412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,1.4721546173095703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,0.8183200359344482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,1.2820320129394531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,1.6752586364746094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,0.7273759841918945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,0.826474666595459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,0.9026613235473633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,0.7148959636688232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,0.7720906734466553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,0.8083413441975912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,0.6956213315327963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,0.7729012966156006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,0.8148000240325928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,0.7817227045694987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,0.6976266702016195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,0.8001653353373209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,0.7092906634012858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,0.7898080348968506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,0.4771253267923991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,0.43166931470235187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,0.47244266668955487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,0.4527626832326253
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,0.4505866765975952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,0.4092959960301717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,0.4582506815592448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,0.4538880189259847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,0.41265066464742023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,0.45998934904734295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,0.45713067054748535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,0.4167519807815552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,0.4699999888737996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,0.46266667048136395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,0.4231040080388387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,3.7031787236531577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,3.6923678716023765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,3.449359893798828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,3.75435733795166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,3.1911306381225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,3.5938507715861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,3.5810667673746743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,3.7096001307169595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,3.5354932149251304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,3.790773391723633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,1.9106666247049968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,3.2824586232503257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,4.01365852355957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,1.934773286183675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,1.7339785893758137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,2.137488047281901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,1.9490346908569336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,1.6238932609558105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,1.9077599843343098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,1.6328266461690266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,1.8405920664469402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,1.9805760383605957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,1.8666507403055828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,1.7916480700174968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,2.0673813819885254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,1.6792747179667156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,2.000175952911377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,1.1229440371195476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,1.0323413213094075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,0.9203306833902994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,0.9837600390116373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,0.9733066558837891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,0.8681546847025553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,0.9775253136952718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,0.8718612988789877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,0.9716800053914388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,0.9865386486053467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,0.8775200049082438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,0.9867093563079834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,0.9902506669362386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,0.8926346302032471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,0.9961493015289307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,0.5690986712773641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,0.5725760062535604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,0.5178399880727133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,0.5405493179957072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,0.4889440139134725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,0.5376053253809611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,0.5408480167388916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,0.5412693421045939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,0.4909813404083252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,0.549893339474996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,0.49462934335072833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,0.546837329864502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,0.5592853228251139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,0.5040640036265055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,0.5580853223800659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,0.3456053336461385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,0.34773866335550946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,0.3282933235168457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,0.32499200105667114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,0.29795199632644653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,0.32524800300598145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,0.3293813268343608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,0.29791466395060223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,0.33056533336639404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,0.3272693355878194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,0.30169065793355304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,0.3327999909718831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,0.33745598793029785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,0.30621333916982013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,2.196261405944824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,2.185258706410726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,2.2274133364359536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,2.190453370412191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,2.219114621480306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,2.061456044514974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,2.292960007985433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,2.2222773234049478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,2.257754643758138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,2.321338653564453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,2.3157973289489746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,2.2484639485677085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,1.323525349299113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,1.2843999862670898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,1.1145280202229817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,1.1324480374654133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,1.1393120288848877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,1.0170186360677083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,1.1462986469268799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,1.1412426630655925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,1.0192906856536865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,1.1488746802012126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,1.1564373175303142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,1.0383893648783367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,1.1790506839752197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,1.1762080192565918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,0.6458826859792074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,1.0554186503092449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,0.6556479930877686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,0.591264009475708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,0.610586682955424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,0.609989325205485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,0.5520266691843668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,0.6186986764272054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,0.6140960057576498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,0.5556266705195109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,0.6222826639811198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,0.6236000061035156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,0.5596906741460165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,0.6473226547241211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,0.6406346559524536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,0.5700159867604574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,0.37518401940663654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,0.37404266993204754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,0.34113065401713055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,0.34595199426015216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,0.35151465733846027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,0.3174720009167989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,0.3492746750513713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,0.34942932923634845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,0.3200693329175313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,0.35518932342529297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,0.3579946756362915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,0.3240533272425334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,0.36556267738342285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,0.3628053267796834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,0.32913599411646527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,0.2285333275794983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,0.23164800802866617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.21516799926757812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,0.21792000532150269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,0.21670933564503989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.2014240026473999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,0.21559999386469522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,0.22009599208831787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.20132267475128174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,0.22027732928593954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,0.21825067202250162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.20221332708994547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,0.2203999956448873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,0.22296533981959024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.20677334070205688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,2.2569120724995932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,2.2416213353474936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,2.1847519874572754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,2.284026622772217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,2.2791892687479653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,2.0343626340230307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,2.3648053805033364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,2.284730593363444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,2.0953173637390137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,2.3389600118001304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,2.3621066411336265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,2.219535986582438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,1.3158453305562336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,1.2743360201517742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,1.1329013506571453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,1.1430933475494385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,1.15120530128479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,1.0279839833577473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,1.1521173318227131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,1.1551093260447185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,1.0334346294403076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,1.1621599992116292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,1.170357306798299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,1.0484373569488525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,1.194602648417155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,1.2032000223795574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,1.0858133633931477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,0.6542559862136841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,0.6701172987620035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,0.5997653404871622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,0.609007994333903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,0.6060106754302979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,0.5487786531448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,0.6061333417892456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,0.608730673789978
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,0.5506399869918823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,0.613589326540629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,0.618506669998169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,0.5550666650136312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,0.6271946827570597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,0.6353386640548706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,0.5714720090230306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,0.3674079974492391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,0.36715201536814374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,0.33267732461293537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,0.3316426674524943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,0.3365226586659749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,0.3038133382797241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,0.3399306535720825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,0.33504533767700195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,0.30638933181762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,0.338703989982605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,0.3458720048268636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,0.31012266874313354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,0.3496319850285848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,0.3513706525166829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,0.32024532556533813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,0.21339199940363565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,0.2156426707903544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.19804799556732178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.19542400042215982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.19543466965357462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.17919466892878214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.19673067331314087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.1970240076382955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.18128534158070883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.19732266664505005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.1986080010732015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.18515199422836304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.20410666863123575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.2062079906463623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.19141334295272827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.13594133655230203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.138154665629069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.13158399860064188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.13275733590126038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.1349013348420461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.12366933623949687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.13513066371281943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.13172800342241922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.12479466199874878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.1318719983100891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.13618133465449014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.12389333049456279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.13489066561063132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.1351626714070638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.12574932972590128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,1.4602559407552083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,1.4406026204427083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,1.29967466990153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,1.4496480623881023
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,1.476261297861735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,1.3538613319396973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,1.4679519335428874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,1.4751200675964355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,1.422426700592041
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,1.5130720138549805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,1.5275519688924153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,1.383834679921468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,0.8326826890309652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,0.8314133485158285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,0.7518986860911051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,0.740991989771525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,0.7570986747741699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,0.6696320374806722
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,0.748090664545695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,0.7493759791056315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,0.6752800146738688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,0.7620533307393392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,0.7731413046518961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,0.687829335530599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,0.7799572944641113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,0.788581371307373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,0.7098613580067953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,0.43486932913462323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,0.446453332901001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,0.40227198600769043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,0.3999199867248535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,0.39897600809733075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,0.3617493311564128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,0.39954133828481037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,0.40859735012054443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,0.3647199869155884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,0.4150720040003459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,0.40855467319488525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,0.37136534849802655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,0.4166826804478963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,0.4280746777852376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,0.3826933304468791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,0.2479733427365621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,0.2500266631444295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,0.228383998076121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,0.22073066234588623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,0.2230559984842936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.20337067047754923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,0.22463999191919962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,0.22420267264048258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.20863467454910278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,0.22762133677800497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,0.232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.21040000518163046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,0.23445866505304971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,0.23603200912475586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.21790399154027304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.14613333344459534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.149125337600708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.1383680005868276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.13421866297721863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.13432000080744425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.12205333511034648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.13449600338935852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.13482133547465006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.12522133191426596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.13552000125249228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.13587733109792074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.12643200159072876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.13953600327173868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.14063466588656107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.1330400009950002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.09528000156084697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.0969599982102712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.09103999535242717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.09360532959302266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.09319466352462769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.08879466851552327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.09351999560991923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.09339200456937154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.08786666393280029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.09444266557693481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.09483733773231506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.08875733613967896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.09504533807436626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,1.5811467170715332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,1.4250772794087727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,1.5750080744425456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,1.6162452697753906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,1.6003306706746419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,1.4463946024576824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,1.6181707382202148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,1.6910400390625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,1.4803519248962402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,1.7467199961344402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,1.706709384918213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,0.9012853304545084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,1.5500106811523438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,0.9156320095062256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,0.8333973089853922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,0.8067999680836996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,0.8138826688130697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,0.7309813499450684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,0.8124266465504965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,0.8143893082936605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,0.7366933027903239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,0.8251252969106039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,0.8390133380889893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,0.7523626486460367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,0.8685440222422282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,0.8696426550547282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,0.7868853410085043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,0.47129066785176593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,0.48892800013224286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,0.4385226567586263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,0.4283626476923625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,0.42692800362904865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,0.3854080041249593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,0.4262559811274211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,0.43516798814137775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,0.3874613444010417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,0.4377439816792806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,0.43813331921895343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,0.3946400086085002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,0.44977064927419025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,0.45907731850941974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,0.4134133259455363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,0.25843199094136554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,0.2622186740239461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,0.24062933524449667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,0.22868265708287558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,0.2312320073445638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.2133493423461914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,0.23381332556406656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,0.23360000054041544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.21482133865356445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,0.2376586596171061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,0.24064532915751138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.21889599164326987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,0.24861866235733032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,0.24754667282104492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.22909333308537802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.14820266763369241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.15228266517321268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.1402720014254252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.1283519963423411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.12984533111254373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.11896000305811565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.13036800424257913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.13202133774757385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.1237493356068929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.13369599978129068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.13502933581670126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.12573333581288657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.13926399747530618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.1402239998181661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.13209066788355509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.08996267120043437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.09198932846387227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.08704533179601033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.08703999718030293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.0800853321949641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.08678932984670003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.08644266923268636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.08705066641171773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.08925867080688477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.08284266789754231
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.056287998954455055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.062277331948280334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.05938666562239329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.06165333092212677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,1.0737813313802083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,1.0744586785634358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,0.9675306479136149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,1.0827360153198242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,1.097861369450887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,0.9840106964111328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,1.1107199986775715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,1.1063573360443115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,1.0010240077972412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,1.1569493611653645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,1.1659786701202393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,0.6295946836471558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,1.0504746437072754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,0.6408426761627197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,0.5810186862945557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,0.5564053455988566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,0.5525973240534464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,0.50054931640625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,0.5555733442306519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,0.5656853516896566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,0.5065973202387491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,0.56877334912618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,0.570464015007019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,0.5177013476689657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,0.5859946807225546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,0.6007893482844034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,0.5386613210042318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,0.3296266595522563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,0.3349226713180542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,0.3089333375295003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,0.29294933875401813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,0.2961440086364746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.2677706678708394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,0.2967946728070577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,0.2980960011482239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.27049599091211957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,0.3002293308575948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,0.30473599831263226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,0.27531200647354126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,0.31325334310531616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,0.3151893417040507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,0.28759467601776123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.18209600448608398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.18757333358128866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.1730239987373352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.1567626694838206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.15827199816703796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.14983999729156494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.1609119971593221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.16022933522860208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.1504853367805481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.16909333070119223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.16528000434239706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.15437333782513937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.1721280018488566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.17587200800577799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.1616106629371643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.10788800319035848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.10896000266075134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.10362133383750916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.09496000409126282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.09506666660308838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.08658132950464885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.09400000174840291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.09478933612505595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.08870399991671245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.09512533744176228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.09744532903035481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.09919466574986775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.10083199540774028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.09590400258700053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.06705066561698914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.06845866640408833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.06597866614659627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.06629333396752675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.06474666794141133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.06463466584682465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.06658133367697398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.06659733255704244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.06214933097362518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.06660800178845723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.06177600224812826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.051674668987592064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.04987200101216634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.050288001696268715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.04987733562787374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.050186668833096824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.04969066878159841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,1.1389706929524739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,1.0918933550516765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,1.1304480234781902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,1.1601866881052654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,1.155354658762614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,1.1032000382741292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,1.2190399964650471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,1.213104009628296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,1.3216906388600667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,1.2234506607055664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,1.205466667811076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,0.6652533213297526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,1.1926240126291912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,0.6635520060857137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,0.6548639933268229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,0.5866560141245524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,0.5835520029067993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,0.559061328570048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,0.5976959864298502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,0.5915146668752035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,0.5620213349660238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,0.6161813338597616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,0.6143946647644043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,0.6561226844787598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,0.6278773148854574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,0.6159893274307251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,0.6047573486963908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,0.34623467922210693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,0.34807467460632324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,0.3415040175120036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,0.3102399905522664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,0.30538666248321533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.28969599803288776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,0.31192533175150555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,0.3131519953409831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.2920373280843099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,0.32331732908884686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,0.3208693265914917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,0.3253013292948405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,0.32843732833862305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,0.3225226600964864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,0.31329067548116046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,0.18854933977127075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,0.18717332681020102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.18387200435002646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.16595199704170227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.16590933005015054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.1544426679611206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.16956265767415366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.16888533035914102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.15846400459607443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.1755680044492086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.1756053368250529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.17179733514785767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.17993066708246866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.17594132820765176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.16870933771133423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.10807466506958008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.10739733775456746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.1058026651541392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.09294933080673218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.09310400485992432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.08877333005269368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.09513066212336223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.09495466947555542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.0883679986000061
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.09754133224487305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.0995306670665741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.09525332848230998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.10460799932479858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.10008533795674641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.10078400373458862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.06363733112812042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.05921066800753275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.05421866476535797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.058287998040517174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.05994666616121928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.058490668733914696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.060592000683148704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.03796799977620443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.038890667259693146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.03809066613515218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.03279466678698858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.032431999842325844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,0.9902613162994385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,0.9732906818389893
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,0.9432533582051595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,0.9958399931589762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,0.9884959856669108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,0.9558506806691488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,1.060810645421346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,1.047445297241211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,1.165285348892212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,1.0500799814860027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,1.034549315770467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,1.068282683690389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,0.5866826772689819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,0.5797866582870483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,0.5790826479593912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,0.5043306748072306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,0.5001013278961182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,0.48144535223642987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,0.5115360021591187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,0.5079840024312338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,0.4864533344904582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,0.550442655881246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,0.5229066610336304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,0.5803786516189575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,0.5389866828918457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,0.5316426753997803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,0.5304906765619913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,0.3060693343480428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,0.3012479941050212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,0.29999999205271405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,0.2641493280728658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,0.2627039949099223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.24948267141977945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,0.2688000003496806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,0.26662399371465045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.25299733877182007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,0.2784586747487386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,0.2770133415857951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.2855306665102641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,0.2850293318430583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,0.2794559995333354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,0.2730986674626668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,0.16292267044385275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.1625386675198873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.16218133767445883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.14245333274205527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.14085867007573447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.13367467125256857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.14620799819628397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.14403200149536133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.13661866386731467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.15092800060908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.15056533614794412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.1520693302154541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.15437333782513937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.15043200055758157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.1463466684023539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.09482666850090027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.09385066231091817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.09803199768066406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.08360532919565837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.08267733454704285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.07454399764537811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.08180266618728638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.08140266438325246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.0773226668437322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.08597333232561748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.08540266752243042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.08330666522185008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.08865599830945332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.08785600463549297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.08541333675384521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.054527997970581055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.05495999753475189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.05009600023428599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.050250664353370667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.050437331199645996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.05186133086681366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.03505599995454153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.033743999898433685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.035216001172860466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.025648000339667004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.02402666707833608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,0.4546080032984416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,0.4477599859237671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,0.4391466776529948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,0.46847466627756756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,0.4633066654205322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,0.4457013209660848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,0.4931093454360962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,0.4870133399963379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,0.5384906530380249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,0.49961598714192706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,0.49005333582560223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,0.48765333493550617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,0.2823573350906372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,0.2762986620267232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,0.2813706596692403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.23849600553512573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.23598933219909668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.22922666867574057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,0.2461013396581014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.24351465702056885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.23447465896606445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,0.2570613423983256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,0.2549440066019694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.2643466591835022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,0.26150933901468915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,0.25748799244562787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,0.25321600834528607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,0.15293332934379578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.15244266390800476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.1508639951546987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.12760000427563986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.12745599945386252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.12247999509175618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.1323466698328654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.13008532921473184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.12640000383059183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.13853333393732706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.13778133193651834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.14084800084431967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.14215999841690063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.1400266687075297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.13662399848302206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.08920533458391826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.08731200297673543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.08719467123349507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.07464000085989635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.07274666428565979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.07348266740640004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.07463466624418895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.07039999961853027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.07794133325417836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.07709333300590515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.07957866787910461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.07858666777610779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.04858666658401489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.042021334171295166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.04394666850566864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.04604266583919525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.030794667700926464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.02422933280467987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.25307732820510864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.2508959968884786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.2478933334350586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.2584000031153361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.25280000766118366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.24937599897384644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,0.2752959926923116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,0.2686613400777181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.2780906756718953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,0.2765653332074483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,0.26944533983866376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,0.2703146735827128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,0.15870400269826254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,0.15590932965278625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.15742400288581848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.13517866532007852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.1339413324991862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.132042666276296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.13924266894658408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.13707733154296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.13377599914868674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.1460479994614919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.1443839967250824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.1471680005391439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.1478559970855713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.14640532930692038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.14492266376813254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.08904533584912618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.08835732936859131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.08983467022577922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.0766133318344752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.07263466715812683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.07594133416811626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.07473066449165344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.07157866656780243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.0806933343410492
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.08020799855391185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.08084799846013387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.08303466439247131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.0830026666323344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.08061866462230682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.052069331208864846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.04608533283074697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.043247997760772705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.04619200030962626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.04442666471004486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.048858667413393654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.046575998266537987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.033173332611719765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.17223467429478964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.17310933272043863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.16456533471743265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.17524800697962442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.17325866222381592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.16666666666666666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.17941333850224814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.18289599816004434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.18182400862375894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.18136000633239746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.18198400735855103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.1771999994913737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.10668800274531047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.10668800274531047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.1067519982655843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.09702933828035991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.09603733817736308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.08889599641164143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.09719467163085938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.08924266695976257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.10123733679453532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.10063466429710388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.10107733805974324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.10124799609184265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.09967999656995137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.05970133344332377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.05016533533732096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.057722667853037514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.05736533304055532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.03584533433119456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.03779733429352442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.03925333420435587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.02584533393383026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.13260799646377563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.13426666458447775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.12570666273434958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.13412266969680786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.13196800152460733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.12548266847928366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.1379200021425883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.13793599605560303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.13272533814112344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.13781866431236267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.1379146675268809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.1341333289941152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.07902400195598602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.07849066456158955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.07448533177375793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.0747626672188441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.0685280015071233
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.07478400071461995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.0703306645154953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.07630933324495952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.0744053324063619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.07260799904664357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.07638399799664815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.0768693337837855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.07433066765467326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.04755199948946635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.04462933540344238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.0465280016263326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.02976000060637792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.029877332349618275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.06234666705131531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.10973333319028218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.10611733794212341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.11243733763694763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.11192533373832703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.10610666871070862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.11342933773994446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.11182933052380879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.10774933298428853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.11334932843844096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.11180266737937927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.10962667067845662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.06666133304437001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.06679466863473256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.06433600187301636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.06410666803518932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.06424533327420552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.06043733159701029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.0644160012404124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.06479466458161671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.06328000128269196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.04038399954636892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.02888533224662145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.025914666553338368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,4.971701304117839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,5.631664276123047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,4.130965232849121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,5.511450449625651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,5.388800303141276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,4.1271359125773115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,5.587952295939128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,5.370522816975911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,4.226240158081055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,2.5261279741923013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,3.0257867177327475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,2.482463995615641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,2.4544533093770347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,2.5574773152669272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,2.408506711324056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,2.465712070465088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,2.4727892875671387
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,2.321296056111654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,2.47763729095459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,2.63266658782959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,2.233919938405355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,1.3450345993041992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,1.3593759536743164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,1.3339999516805012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,1.3118826548258464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,1.3124372959136963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,1.1626293659210205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,1.3262720108032227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,1.3584693272908528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,1.169584035873413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,1.3197920322418213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,1.3802132606506348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,1.2010186513264973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,0.7721920013427734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,0.7786773045857748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,0.6934506893157959
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,0.7451573212941488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,0.7489813168843588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,0.6712640126546224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,0.7503680388132731
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,0.7520693143208822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,0.676800012588501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,0.7556479771931967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,0.7721227010091146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,0.6794506708780924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,3.0169013341267905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,3.1162773768107095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,2.4844640096028647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,2.8624960581461587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,3.1228745778401694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,2.6168905893961587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,2.8742345174153647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,3.008783976236979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,2.6564106941223145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,1.5347414016723633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,1.549898624420166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,1.3648212750752766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,1.487280050913493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,1.4846080144246419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,1.3085813522338867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,1.4862186113993328
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,1.5214559237162273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,1.3509333928426106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,1.5014932950337727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,1.5710186958312988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,1.3244746526082356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,0.8565013408660889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,0.8462453683217367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,0.7545173168182373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,0.8048906326293945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,0.810640017191569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,0.7294452985127767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,0.8306399981180826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,0.8238879839579264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,0.7753760019938151
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,0.8560960292816162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,0.821669340133667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,0.7346293131510416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,0.4960373242696126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,0.5002773205439249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,0.4500693480173747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,0.4804533322652181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,0.48206400871276855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,0.4312106768290202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,0.47654934724171955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,0.4756213426589966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,0.43274664878845215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,0.4808479944864909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,0.48999468485514325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,0.4391520023345947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,2.0638559659322104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,2.068448066711426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,1.8351945877075195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,2.08351469039917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,2.1624266306559243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,2.0829760233561196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,2.0967839558919272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,2.2374614079793296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,1.8502346674601238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,1.1349973678588867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,1.1699466705322266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,1.0362079938252766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,1.0880426565806072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,1.0942560036977131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,0.9640373388926188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,1.0922613143920898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,1.1200426419576008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,0.9969120025634766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,1.1092693010965984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,1.108575979868571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,0.9780053297678629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,0.6286826531092325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,0.6350826819737753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,0.5695786476135254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,0.6031146844228109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,0.6026613314946493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,0.5429066816965739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,0.6070346832275391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,0.6088000138600668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,0.5476426680882772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,0.6241759856541952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,0.6158453226089478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,0.5519733428955078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,0.37726934750874835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,0.3831733465194702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,0.34647464752197266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,0.36472535133361816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,0.3651573260625203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,0.3286506732304891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,0.36554133892059326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,0.3654559850692749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,0.33083732922871906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,0.36722131570180255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,0.3723359902699788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,0.3355199893315633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,2.736858685811361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,2.9200960795084634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,2.5224000612894693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,2.7569545110066733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,3.0663038889567056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,2.6409494082132974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,2.872565269470215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,2.8348159790039062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,2.8072373072306314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,1.508394718170166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,1.498751958211263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,1.3431359926859539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,1.4215893745422363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,1.4877066612243652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,1.286736011505127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,1.4319252967834473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,1.4877546628316243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,1.2793546517690022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,1.444287935892741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,1.4473333358764648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,1.2958186467488606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,0.7915093104044596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,0.7984373569488525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,0.7133493423461914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,0.7544799645741781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,0.7573760350545248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,0.6740640004475912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,0.7600586414337158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,0.7614080111185709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,0.6781120300292969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,0.7669173081715902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,0.7694719632466634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,0.6852640310923258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,0.4484479824701945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,0.45284799734751385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,0.40877334276835126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,0.43231467405955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,0.4325973192850749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,0.3874560197194417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,0.4381066560745239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,0.43721600373586017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,0.38995198408762616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,0.44121066729227704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,0.44274131457010907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,0.3944213390350342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,0.2784693241119385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,0.28148265679677326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.25511467456817627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,0.2640320062637329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,0.26551999648412067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.24219733476638794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,0.26445333162943524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,0.26639999945958454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.24236800273259482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,0.2665119965871175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,0.2664693395296733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.24466667572657266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,1.6998826662699382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,1.7687999407450359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,1.654853343963623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,1.9813013076782227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,1.7536533673604329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,1.588714599609375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,1.7393919626871746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,1.8282666206359863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,1.6497119267781575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,0.9349706967671713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,0.9473013083140055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,0.8461546897888184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,0.8847946325937907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,0.8864533106486002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,0.7973546981811523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,0.8844373226165771
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,0.8906400203704834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,0.7910133202870687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,0.8985280195871989
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,0.9033119678497314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,0.8074613412221273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,0.5091520150502523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,0.5139893293380737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,0.46272532145182294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,0.47974932193756104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,0.48129598299662274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,0.43299198150634766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,0.48527467250823975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,0.4856746594111125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,0.4350506862004598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,0.48972264925638836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,0.49302399158477783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,0.44114665190378827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,0.297487994035085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,0.30105600754419964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,0.27141332626342773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,0.28173865874608356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,0.28223999341328937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.25128533442815143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,0.28009066979090375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,0.28148800134658813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.25640533367792767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,0.2895413239796956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,0.28920533259709674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.2606559991836548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.19196800390879312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.19329599539438883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.17543999354044595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.18487467368443808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.1853920022646586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.16869332393010458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.18583466609319052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.18743467330932617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.16941867272059122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.1867520014444987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.18689066171646118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.1688800056775411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,1.7446506818135579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,1.8451840082804363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,1.5479893684387207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,1.749029318491618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,1.7859093348185222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,1.5604160626729329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,1.7760213216145833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,1.7970666885375977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,1.657541275024414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,0.9687200387318929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,0.9807946681976318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,0.8733920256296793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,0.8940853277842203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,0.8960800170898438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,0.7956746419270834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,0.9007413387298584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,0.9038026332855225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,0.8004106680552164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,0.9120960235595703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,0.9185492992401123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,0.8150453567504883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,0.5146026611328125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,0.5288800001144409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,0.467904011408488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,0.482256015141805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,0.4848533471425374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,0.42711468537648517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,0.4818506638209025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,0.4824533462524414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,0.4288586775461833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,0.48944000403086346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,0.4931306838989258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,0.43832000096638996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,0.28993600606918335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,0.29340267181396484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,0.26502933104832965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,0.2678080002466838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,0.2693600058555603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.24173865715662637
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,0.27082133293151855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,0.2717546621958415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.24408533175786337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,0.277130663394928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,0.2776799996693929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.24923733870188394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.1755946675936381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.17572800318400064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.16079466541608176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.16011200348536173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.16074132919311523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.1458186705907186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.16194666425387064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.16251200437545776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.14560533563296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.16309332847595215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.16404799620310465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.15053332845369974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.11373866597811381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.11461866895357768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.1055626670519511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.1109226644039154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.11101333300272624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.10325866937637329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.1116373340288798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.11193600296974182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.10345066587130229
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,1.1357759634653728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,1.1349866390228271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,1.0055466492970784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,1.1396746635437012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,1.1450293064117432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,1.01638396581014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,1.1576266288757324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,1.1646560033162434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,1.049232006072998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,0.6472959915796915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,0.6594506502151489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,0.5815466642379761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,0.588640014330546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,0.5958880186080933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,0.5254133145014445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,0.5917866627375284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,0.5942506790161133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,0.5284159978230795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,0.6091733376185099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,0.6131893396377563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,0.5396053393681844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,0.3455626567204793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,0.35172800223032635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,0.3159839908281962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,0.3169066707293193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,0.3216106692949931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,0.28697067499160767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,0.32469334204991657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,0.3257546623547872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,0.28818132479985553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,0.32817065715789795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,0.33239465951919556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,0.295141339302063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,0.20067199071248373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,0.20344533522923788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.18306666612625122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.17896000544230142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.18127467234929404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.16290666659673056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.18307733535766602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.183786670366923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.16594666242599487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.18531199296315512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.1874879995981852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.17122133572896323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.12248532970746358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.12336533268292744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.11512533823649089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.11423466602961223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.11524266997973125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.11564266681671143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.11583999792734782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,0.31327466169993085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.10724266370137532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.11885866522789001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.1181599994500478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.10723732908566792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.07999999821186066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.07642666498819987
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.07036266724268596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.07681066791216533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.07703466713428497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.07643199960390727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.07686399916807811
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.07242133220036824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,1.2454666296641033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,1.2512906392415364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,1.1051519711812336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,1.2633439699808757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,1.1202186743418376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,1.2686986923217773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,1.2826560338338215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,1.294810692469279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,0.7118186950683594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,1.1447093486785889
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,0.7262933254241943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,0.6447679996490479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,0.6447519858678182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,0.6436853408813477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,0.5714079936345419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,0.644976019859314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,0.6519786516825358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,0.5751466751098633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,0.6597919861475626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,0.6648639837900797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,0.5908799966176351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,0.3747359911600749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,0.38286932309468585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,0.3421119848887126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,0.3416159947713216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,0.3419413169225057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,0.3036213318506877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,0.3430773417154948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,0.34705066680908203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,0.3067359924316406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,0.35129066308339435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,0.35255467891693115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,0.2080693244934082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,0.3150399923324585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,0.21159466107686362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.19217065970102945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.18241600195566812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.18874667088190714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.16662399967511496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.1898933251698812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.18749332427978516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.1727679967880249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.19272534052530924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.19594132900238037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.17679466803868613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.12223999698956807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.12324800093968709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.11473066608111064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.10821333527565002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.11040000120798747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.09701866904894511
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.11058666308720906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.11101333300272624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.0992746651172638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.1123253305753072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.11314666271209717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.10191999872525533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.07821333408355713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.07410133381684621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.07645333309968312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.06819733480612437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.07645866771539052
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.06834133466084798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.07646400233109792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.07611733178297679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.07648000121116638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.07043200234572093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.0572213331858317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.05427733560403188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.05644266804059347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.05198933184146881
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.05638933181762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,0.8888053099314371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,0.8920693397521973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,0.7834933598836263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,0.899738629659017
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,0.8993013699849447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,0.791973352432251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,0.9168907006581625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,0.9131146272023519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,0.5077813466389974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,0.8105759620666504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,0.5221226612726847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,0.4583573341369629
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,0.45071999231974286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,0.4551146825154622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,0.39986133575439453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,0.4561493396759033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,0.4547413190205892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,0.4044320185979207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,0.4633866548538208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,0.47066132227579754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,0.4148906469345093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,0.2706506649653117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,0.2739199995994568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,0.24596800406773886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.2384799917538961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.24066666762034097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.2145973245302836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,0.24285866816838583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,0.2424266735712687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.2183039983113607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,0.24833599726359049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,0.25225599606831867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.2224000096321106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.15262400110562643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.15150933464368185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.14031466841697693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.12781866391499838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.13155200084050497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.11541866262753804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.12989866733551025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.13116266330083212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.12160533666610718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.13593600193659464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.13929599523544312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.12648533781369528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.08847999572753906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.08477333188056946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.0806826651096344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.08102400104204814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.07241066793600719
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.08089600006739299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.08124800026416779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.08069866895675659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.0860746701558431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.07431999842325847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.05572799841562907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.052784000833829246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.05234666665395101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.04850666721661886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.05027199784914652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.04828799764315287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.047370667258898415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.044309332966804504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,0.9097867012023926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,0.9095199902852377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,0.8664320309956869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,0.9322720368703207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,0.9278879960378011
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,0.8910880088806152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,0.947322686513265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,0.9404213428497314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,0.5339626471201578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,0.9058453241984049
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,0.5238346656163534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,0.5106186469395956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,0.4694026708602905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,0.46860265731811523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,0.44498133659362793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,0.48040000597635907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,0.4772426684697469
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,0.45551466941833496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,0.4881226619084676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,0.48622934023539227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,0.46782398223876953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,0.28274667263031006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,0.27688000599543255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,0.269269327322642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,0.24572267134984335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,0.2453599969546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.23015999794006348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,0.25277866919835407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,0.25173866748809814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.2393760085105896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,0.25810132424036664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,0.2564000089963277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.24351465702056885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.15522133310635886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.15270400047302246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.1471573313077291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.13421333829561868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.13506133357683817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.12425600488980611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.13808000087738037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.1356160044670105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.13155200084050497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.14075199762980142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.13454399506251016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.14080533385276794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.09130133191744487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.08929600318272908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.08819733063379924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.08042666812737782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.08035733302434285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.07082666456699371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.07937600215276082
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.07272000114123027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.08067733546098073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.07464000085989635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.0566293348868688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.055248002211252846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.054832001527150474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.048245335618654885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.052101333936055504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.054799998799959816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.04821866750717163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.0400693342089653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.03646933287382126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.03925333420435587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.03755733370780945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.041050667564074196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.03902400036652883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.035904000202814736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.031957333286603294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.03708266715208689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.03565333286921183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.03526933242877325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,0.7861653168996176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,0.7851680119832357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,0.7489973704020182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,0.8088160355885824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,0.7987093130747477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,0.7697599728902181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,0.8214026292165121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,0.8149920304616293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,0.47063998381296795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,0.7988426685333252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,0.45839468638102215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,0.45102401574452716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,0.4031360149383545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,0.4013599952061971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,0.3854080041249593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,0.41278398036956787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,0.41255998611450195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,0.3937813440958659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,0.423583984375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,0.4188426733016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,0.4030719995498657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,0.24914666016896567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,0.24318933486938477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,0.23778667052586874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.2113813360532125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.21075199047724405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.1993866761525472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.21818133195241293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.21757866938908896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.20869867006937662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.22340265909830728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.22197866439819336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.2119413415590922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.13682666420936584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.13357333342234293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.13145599762598673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.11758400003115337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.11496532956759135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.10531733433405559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.1184266706307729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.11959466338157654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.11382399996121724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.12177067001660664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.12126400073369344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.1174720029036204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.07967466612656911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.079434668024381
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.07829333345095317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.07002666592597961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.0691786656777064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.06922666728496552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.07039466500282288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.07256000240643819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.06856533388296764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.049498667319615684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.04608533283074697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.04197866717974345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.04809066653251648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.04358399907747904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.03269333392381668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.031093334158261616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.03279466678698858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.03254933406909307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.03294933338960012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.030896000564098358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.028890666862328846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.026901334524154663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,0.36633598804473877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,0.36561067899068195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,0.3553706804911296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,0.3743199904759725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,0.37305064996083576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,0.364901343981425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,0.38301865259806317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,0.37911466757456463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,0.3810826539993286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,0.22933334112167358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,0.22498132785161337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,0.22265599171320596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.1929439902305603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.19156267245610556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.18623467286427817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.20003734032313028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.19869865973790488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.19417067368825278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.20253332455952963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.19978666305541992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.19719467560450235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.12772799531618753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.12429333726565044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.12331199645996094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.10941867033640544
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.10798399647076924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.09908800323804219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.10985599954922994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.1111253301302592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.10739200313886006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.11296000083287557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.11284266908963521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.10891733566919963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.07144000132878621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.07156266768773396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.06281066437562306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.06367466847101848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.0544053316116333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.0625600020090739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.06465066472689311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.06451733410358429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.04438399771849314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.04635733366012573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.0424586683511734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.04404800136884054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.04404800136884054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.04372799893220266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.0444106658299764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.030000001192092896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.030026666820049286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.030960001051425934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.03088533381621043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.02696000039577484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.022245332598686218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.20797866582870483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.2053119937578837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.2015626629193624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.2137440045674642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.21227733294169107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.20732800165812174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.2157813310623169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.21398399273554483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.2109760046005249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.12990400195121765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.12755733728408813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.12981866796811423
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.11366400122642517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.11231999595959981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.10564266641934712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.11498666803042094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.11522133151690166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.1134986678759257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.11848533153533936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.11701333522796631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.11565333604812622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.07484266658624013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.07417066891988118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.07519466678301494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.06672533353169759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.06611733138561249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.058549334605534874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.06622399886449178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.06518933176994324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.06784533460934956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.06701333324114482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.06354666749636333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.04327466587225596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.04197866717974345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.042090664307276406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.042437334855397545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.03825066735347112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.029919999341169994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.030991998811562855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.030693332354227703
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.13987732927004495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.1386613349119822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.12896533807118735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.14176533619562784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.14192533493041992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.13636266191800436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.1442506710688273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.14309866229693094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.13877333203951517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.08653866251309712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.08646933237711589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.07853333155314128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.07876266539096832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.07105599840482076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.07923733194669087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.07904533545176189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.07337066531181335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.08075200021266937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.07993066807587941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.050399998823801674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.04825599988301595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.049216002225875854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.0496373325586319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.03305600086847941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.10670933127403259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.10577600200970967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.09757333000500996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.1053013304869334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.10525866349538167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.09826667110125224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.10533333818117778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.10052266716957092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.06497600177923839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.06446933249632518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.062405332922935486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.06117866436640421
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.05649599929650625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.062447999914487205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.06275199850400288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.05859733124574026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.062319998939832054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.04190400242805481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.03881066789229711
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.041109333435694374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.04136000076929728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.027744000156720478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.028512001037597656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.029066666960716248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.08900800347328186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.08893866340319316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.08267733454704285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.088837335507075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.089519997437795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.0831520011027654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.0899839997291565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.08933333555857341
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.08507200082143147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.05426666637261709
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.054378668467203774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.05356266597906748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.05031999945640564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.053077335158983864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.050661335388819374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.03694933404525121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.019744000087181728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.02080533280968666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,3.6392641067504883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,3.679327964782715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,2.7798080444335938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,3.2828105290730796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,3.6768105824788413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,3.0625813802083335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,3.2992798487345376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,3.253328005472819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,3.605685234069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,1.7125546137491863
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,1.8361172676086426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,1.776528040568034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,1.656618595123291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,1.6685973803202312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,1.7898346583048503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,1.6793866157531738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,1.8821226755777996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,1.8106826146443684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,1.6924160321553547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,1.6872800191243489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,1.6923413276672363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,0.933082660039266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,0.9390933513641357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,0.8383306662241617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,0.9097546736399332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,0.9122560024261475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,0.8120106856028239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,0.9152373472849528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,0.9222186406453451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,0.8282240231831869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,0.9201813538869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,0.9428586959838867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,0.8258612950642904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,0.5536906719207764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,0.557914654413859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,0.5022933483123779
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,0.5462826490402222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,0.5481866598129272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,0.4846186637878418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,0.5468000173568726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,0.5461599826812744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,0.49010133743286133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,0.5528586705525717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,0.5531253417332967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,0.49411733945210773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,1.9164533615112305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,2.1225973765055337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,1.825178623199463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,1.9274346033732097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,1.9467627207438152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,1.7651306788126628
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,1.9432907104492188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,2.08515199025472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,1.7093067169189453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,1.044917345046997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,1.055023988087972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,0.9391146500905355
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,1.0333333015441895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,1.012880007425944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,0.9039466381072998
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,1.0284106731414795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,1.032960017522176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,0.9223626454671224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,1.0256106853485107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,1.0290453433990479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,0.9251466592152914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,0.6023146708806356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,0.5979040066401163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,0.5315359830856323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,0.5666133165359497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,0.567792018254598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,0.5127679904301962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,0.5765653451283773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,0.5726186831792196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,0.5174773136774699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,0.5898133516311646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,0.5813279946645101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,0.5231733322143555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,0.36397333939870197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,0.36505067348480225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,0.33032532533009845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,0.3510826826095581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,0.3500266472498576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,0.3182239929835002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,0.3527359962463379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,0.3521653413772583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,0.3166559934616089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,0.3529920180638631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,0.35572266578674316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,0.3229440053304036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,1.384112040201823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,1.4159785906473796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,1.2329013347625732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,1.4072906176249187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,1.394037405649821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,1.3162879943847656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,1.437178611755371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,1.434165318806966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,1.3176480134328206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,0.7890133062998453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,0.7970186869303385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,0.7010773022969564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,0.761029322942098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,0.7882773081461588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,0.6708532969156901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,0.7664000193277994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,0.7578986485799154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,0.6729599634806315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,0.7655093669891357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,0.7782346407572428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,0.6824586391448975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,0.447002649307251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,0.45314133167266846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,0.40437865257263184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,0.4275840123494466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,0.43372801939646405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,0.38765867551167804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,0.4360533157984416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,0.4368373155593872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,0.3914613326390584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,0.4423360029856364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,0.4447466532389323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,0.3959840138753255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,0.2845279971758525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,0.28570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.2566399971644084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,0.2738560040791829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,0.2744586666425069
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.24874132871627808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,0.2757440010706584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,0.27694400151570636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.2487786610921224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,0.27525333563486737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,0.2778720060984294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.25220266977945965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,1.8240319887797039
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,1.8428799311319988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,1.8031199773152669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,1.847274621327718
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,1.854570706685384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.11137066284815471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,1.6363040606180828
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,1.8598079681396484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,1.8755946159362793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,0.9887359937032064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,1.6587519645690918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,1.0027519861857097
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,0.9024373690287272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,0.9439093271891276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,0.9573706785837809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,0.8531520366668701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,0.9624106884002686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,0.9778133233388265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,0.8581759929656982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,0.9826933542887369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,1.001535971959432
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,0.8739199638366699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,0.5454293489456177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,0.5565493504206339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,0.4962400197982788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,0.5167093276977539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,0.5314240058263143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,0.47490668296813965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,0.5363039970397949
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,0.5347893238067627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,0.4753653208414714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,0.539738655090332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,0.5355413357416788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,0.4809279839197795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,0.32019732395807904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,0.325162669022878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,0.294597327709198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,0.3073333303133647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,0.3101866642634074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.2775040070215861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,0.30949334303538006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,0.30901867151260376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,0.2795413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,0.3142079909642537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,0.31754134098688763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,0.28750399748484295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.2079360087712606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.20838934183120728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.19350934028625488
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.20109866062800089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.204858660697937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.1851146618525187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.20460800329844156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.20310932397842407
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.18544532855351767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.20382932821909586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.2069173256556193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.1872319976488749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,1.1555999914805095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,1.1466399828592937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,1.0160106817881267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,1.1539359887440999
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,1.1686773300170898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,1.029968023300171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,1.161237319310506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,1.1682666937510173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,1.0429920355478923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,0.6355520089467367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,0.6434346834818522
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,0.5797973473866781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,0.6152586539586385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,0.612613320350647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,0.5419413248697916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,0.6139893531799316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,0.6143146753311157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,0.5473440090815226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,0.6205600102742513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,0.6261706749598185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,0.5564159949620565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,0.35446401437123615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,0.3259200056393941
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,0.35980268319447833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,0.3364106814066569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,0.3057760000228882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,0.34299735228220624
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,0.3452106714248657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,0.31016000111897785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,0.3498293161392212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,0.35344000657399494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,0.3134079972902934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,0.21218132972717285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,0.21357866128285727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.19939732551574707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.20362132787704468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.20357867081960043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.1832480033238729
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.20417600870132446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.2071359952290853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.18522133429845175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.20695465803146362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,0.2079360087712606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.19056532780329385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.13822933038075766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.14014400045077005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.13010133306185404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.1360160013039907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.13544533650080362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.12602667013804117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.13451199730237326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.13587733109792074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.12762133280436197
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.1362826625506083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.12778133153915405
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,1.1782666842142742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,1.1631466547648113
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,1.0414079825083415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,1.172650655110677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,1.1778559684753418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,1.0567093690236409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,1.1983359654744465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,1.2221439679463704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,1.0781493186950684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,0.6617546478907267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,0.6648373206456503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,0.5919520060221354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,0.61190398534139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,0.6157279809316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,0.5421173175175985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,0.6092960039774576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,0.6113386551539103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,0.5490666627883911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,0.6186826626459757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,0.6248106559117635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,0.5624746481577555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,0.3601173162460327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,0.36272533734639484
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,0.3243253429730733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,0.3319946726163228
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,0.3325120011965434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,0.29786133766174316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,0.33795734246571857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,0.3411946694056193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,0.3031253417332967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,0.3437439997990926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,0.34648001194000244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,0.3083680073420207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,0.20507200558980307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,0.20825066169102988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.1906986633936564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.19037866592407227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.19066667556762695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.17072000106175741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.19416000445683798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.19562667608261108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.17546667655309042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.19813867410024008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.19947733481725058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.18097599347432455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.1267573336760203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.1299626628557841
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.12157866358757019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.12196266651153564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.12184000015258789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.1130400002002716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.12434132893880208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.12609066565831503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.11214400331179301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.1221386690934499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.1249066690603892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.11372266213099162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.08779733379681905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.08889599641164143
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.08283199866612752
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.08334400256474812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.07860800127188365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.08304533362388611
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.07887466748555501
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.08461333314577739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.08508800466855367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.07896000146865845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,0.7701706886291504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,0.7654773394266764
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,0.6794453461964926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,0.7765813668568929
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,0.779525359471639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,0.6932373046875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,0.7910239696502686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,0.7980799674987793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,0.709658702214559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,0.43991466363271076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,0.44621864954630536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,0.3975840012232463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,0.4025599956512451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,0.40410133202870685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,0.35944533348083496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,0.4089973370234172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,0.41234131654103595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,0.3663040002187093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,0.41813866297403973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,0.420415997505188
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,0.3747626543045044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,0.24138667186101279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,0.24615466594696045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,0.22240533431371054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,0.2211573322614034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,0.22069867451985678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.2007840077082316
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,0.22596800327301025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,0.22765866915384927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.20588799317677817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,0.23270932833353677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,0.23345067103703818
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.21059733629226685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.14239999651908875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.14328533411026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.13145599762598673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.12814399600028992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.128330667813619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.11759466926256816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.13235200444857279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.13214932878812155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.11905599633852641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.13319999972979227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.13412266969680786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.12384532888730367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.08906666437784831
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.0905013382434845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.08267733454704285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.08525333801905315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.08718400200208028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.08730133374532063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.08010666569073994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.09326933821042378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.09327466289202373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.06768533090750377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.0682826687892278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.06414400041103363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.06634666522343953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.06646400193373363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.06665599842866261
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.06414933502674103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.0664160003264745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.06247466802597046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,0.8676853179931641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,0.8485173384348551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,0.7588160037994385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,0.8659253120422363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,0.8718400001525879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,0.7719893455505371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,0.8820052941640218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,0.8899839719136556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,0.7943893273671468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,0.4867146809895833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,0.49167466163635254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,0.4399679899215698
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,0.4448586702346802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,0.4373600085576375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,0.3928639888763428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,0.4460373322168986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,0.44933334986368817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,0.4008800188700358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,0.4582506815592448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,0.46187734603881836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,0.4140213330586751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,0.261189341545105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,0.26366400718688965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,0.23786665995915732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,0.23471466700236002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,0.23543467124303183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.21338133017222086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,0.23969600598017374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,0.24136000871658325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.21782932678858438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,0.24606400728225708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,0.2488159934679667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.2240053415298462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.14968533317248026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.13691733280817667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.12754666805267334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.12850133577982584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.11786133050918579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.12999999523162842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.13180800278981528
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.12354666988054912
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.13775466879208884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.13859200477600098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.12896533807118735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.08610666791598003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.08780266841252644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.08470400174458821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.08273600041866302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.08257600168387096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.08211733400821686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.08297599852085114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.07649066547552745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.08855467041333516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.07932800054550171
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.0580320010582606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.05420266588528951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.055973331133524575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.054373333851496376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.05221333106358846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.05646933118502299
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.05235733091831207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.050101334849993386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.04990399877230326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.049866666396458946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.04725333551565806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,0.6048373381296793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,0.5959200064341227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,0.5488906701405843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,0.6174559990564982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,0.6025333404541016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,0.5399359862009684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,0.6166719992955526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,0.6196853319803873
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,0.5501386721928915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,0.34557334582010907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,0.3491199811299642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,0.31302932898203534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,0.30587200323740643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,0.30773866176605225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.276528000831604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,0.31224532922108966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,0.3141653339068095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,0.2818719943364461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,0.3199146588643392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,0.32435200611750287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,0.28987733523050946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.18690133094787598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.1883359948794047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.17280532916386923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.16200000047683716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.16381866733233133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.15174933274586996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.16769067446390787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.16935465733210245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.1546346644560496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.1737066706021627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.1792693336804708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.1613759994506836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.10830400387446086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.1088106632232666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.10179733236630757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.09362133344014485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.09594133496284485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.08539199829101562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.09551999966303508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.09611733754475911
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.0869813362757365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.09702400366465251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.09334400296211243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.06465599934260051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.06700266897678375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.06213866670926412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.06274666885534923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.05665599803129832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.05841066439946493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.04611733555793762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.04316799839337667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,0.6128319899241129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,0.5965119997660319
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,0.5917173226674398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,0.6152799924214681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,0.6105813185373942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,0.5830293496449789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,0.6625920136769613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,0.6374773184458414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,0.6452960173288981
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,0.35311468442281085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,0.34940266609191895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,0.34303466478983563
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,0.31568000713984173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,0.31300799051920575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.29891733328501385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,0.32126400868097943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,0.3189653356870015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,0.3020159999529521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,0.33533334732055664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,0.33159999052683514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,0.32237333059310913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,0.19134400288263956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,0.19092265764872232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.19214399655659994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.17305066188176474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.17231466372807822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.15938133001327515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.17287466923395792
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.17309866348902384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.16225600242614746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.18127999703089395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.18000000715255737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.17353065808614096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.11102400223414104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.10756267110506694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.10454400380452473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.09523733456929524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.09514133135477702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.08845333258310954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.10140800476074219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.09948800007502238
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.10371733705202739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.10311999917030334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.10047466556231181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.06468800206979115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.06281599899133046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.05885333319505056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.059088001648585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.05666666726271311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.060565332571665444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.05509333312511444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.06173333525657654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.061573331554730736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.060533334811528526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.040048000713189445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.03988266736268997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.038736000657081604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.040106666584809623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.04271466533342997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.039274667700131737
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.029898665845394135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,0.5221759875615438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,0.5146880149841309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,0.49694931507110596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,0.5351999998092651
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,0.5283733208974203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,0.5071680148442587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,0.5695840120315552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,0.5616426865259806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,0.5679999987284342
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,0.3197653293609619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,0.30502933263778687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,0.30485866467158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,0.27454932530721027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,0.2712159951527913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.25804799795150757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,0.2797013322512309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,0.2776479919751485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.26321067412694293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,0.2927680015563965
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,0.29041600227355957
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,0.28486400842666626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,0.16689600547154745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.16421866416931152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.16336533427238464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.14781866470972696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.14722133676211038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.13959466417630514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.15100266536076865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.14987732966740927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.14082133769989014
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.1567359964052836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.15779733657836914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.15227199594179788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.09636800487836202
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.09357333183288574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.09277866284052531
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.08163199822107951
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.08164266745249431
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.07682666679223378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.0831413318713506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.08293333152929942
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.07828799883524577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.08956799904505412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.08903466661771138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.08751466870307922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.05473066866397858
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.054192001620928444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.0544053316116333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.05013333261013031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.05188799897829691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.051674668987592064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.052245333790779114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.036202666660149894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.03229333211978277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.03306133300065994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.034048000971476235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.02902399996916453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.026698666314284008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.024864000578721363
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.023669332265853882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.23963199059168497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.2376799980799357
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.23697600762049356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,0.2463093400001526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,0.24206932385762533
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.24299200375874838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,0.26693334182103473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,0.2603626648585002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,0.26101867357889813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,0.15497066577275595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.15130133430163065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.1516693333784739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.12898133198420206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.12754133343696594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.12702932953834534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.1330773333708445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.13120533029238382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.12943999965985617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.14417066176732382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.14110400279362997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.14063466588656107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.09041600426038106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.08656000097592671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.08734933535257976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.07469333211580913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.07444799939791362
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.07196266452471416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.07702933251857758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.0751146674156189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.08255999783674876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.08085333307584126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.04853333532810211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.046309332052866616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.05031999945640564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.04456533491611481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.04215999941031138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.04608533283074697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.04460266729195913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.04580800235271454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.032245332996050514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.03202133377393087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.13980799913406372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.13778666655222574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.13737600048383078
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.1406719982624054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.14065600434939066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.13860266407330832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.14704533418019614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.14658133188883463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.14723733067512512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.0895253320535024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.08710400263468425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.08835732936859131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.07682666679223378
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.07674666742483775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.07473066449165344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.08086933195590973
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.08261866867542267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.05026666820049286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.048384000857671104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.04786133269468943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.04654400050640106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.03314133236805598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.029861333469549816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.03194133440653483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.024298667907714844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.021914665897687275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.023957334458827972
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.02248000105222066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.09895466764767964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.09745599826176961
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.09086933732032776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.09840533137321472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.09756267070770264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.101583997408549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.10217066605885823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.09975467125574748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.05961599946022034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.059530665477116905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.056287998954455055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.05068266888459524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.055973331133524575
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.05629866818586985
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.05834133426348368
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.05639466643333435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.01969066634774208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.07458133498827617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.07640533149242401
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.0707946668068568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.07484266658624013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.07660800218582153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.07484266658624013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.04517866671085358
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.04576533536116282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.04622933268547058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.030266667405764263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.02189333240191142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.016602666427691776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.06452799836794536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.06620799998442332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.06243733565012614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.06633066634337108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.06465066472689311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.041264000038305916
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.03985599925120672
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.02181866765022278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.01969066634774208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,1.7123732566833496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,1.6828907330830891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,1.459829330444336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,1.701807975769043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,1.7004319826761882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,1.470128059387207
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,0.9315626621246338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,0.9360640048980713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,0.8195679982503256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,0.9125119845072428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,0.926736036936442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,0.7995626926422119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,0.9204373359680176
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,0.9118879636128744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,0.8039200305938721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,0.5378026564915975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,0.5426346858342489
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,0.48153066635131836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,0.5252853234608968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,0.525541345278422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,0.46570666631062824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,0.5325066645940145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,0.533408006032308
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,0.47357332706451416
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,0.34089068571726483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,0.3421066602071126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,0.3092053333918254
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,0.3431359926859538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,0.3438719908396403
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,0.30802132685979206
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,0.34428266684214276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,0.3351893424987793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,0.30421332518259686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,1.0492160320281982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,1.0246346791585286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,0.8972106774648031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,1.0322666962941487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,1.0326879819234211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,0.9066666762034098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,0.5815413395563761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,0.5877013206481934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,0.5200746854146322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,0.568336009979248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,0.5699306726455688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,0.5025440057118734
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,0.5732693274815878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,0.5751573244730631
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,0.5090986490249634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,0.3475786844889323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,0.3492533365885417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,0.31270400683085126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,0.3363413413365682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,0.33907198905944824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,0.3023573358853658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,0.3408373196919759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,0.3413120110829671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,0.3065920074780782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,0.2206666668256124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,0.22300267219543457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.20375466346740723
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,0.21901865800221762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,0.22023467222849527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.2017013430595398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,0.22034666935602823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,0.22047466039657593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.2029119928677877
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,0.7551893393198649
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,0.7595040003458658
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,0.6657119989395142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,0.7641386985778809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,0.7680959701538086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,0.6729919910430908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,0.44019198417663574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,0.445034662882487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,0.3945173422495524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,0.4259999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,0.42660268147786456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,0.3792693217595418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,0.430458664894104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,0.4336586793263753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,0.38384532928466797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,0.27101866404215497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,0.2714453339576721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.24486400683720908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,0.26148800055185956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,0.2637760043144226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.2342346707979838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,0.26522133747736615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,0.26562132438023883
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.23805866638819376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.16773333152135214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.169487992922465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.15439466635386148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.1670666734377543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.16756266355514526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.1525973379611969
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.16660267114639282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.16871466239293417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.15440000096956888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,1.009386698404948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,0.981503963470459
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,0.8575200239817301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,0.9881760279337565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,0.9968427022298177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,0.8708639939626058
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,0.549509326616923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,0.553978681564331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,0.48817066351572674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,0.527514656384786
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,0.5236106713612875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,0.4679093360900879
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,0.5348053375879923
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,0.53875199953715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,0.4748106797536214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,0.31761600573857623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,0.32054932912190753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,0.28599466880162555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,0.30197866757710773
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,0.30356266101201373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.271504004796346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,0.3097653388977051
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,0.31040533383687335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,0.27804799874623615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.19965867201487222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.20083200931549072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.17914666732152304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.19387733936309814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.19873599211374918
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.06166933476924896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.17724267641703287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.199178675810496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.19934932390848795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.17576533555984497
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.1269973317782084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.12971199552218118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.11990400155385335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.12798399726549783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.12800000111262003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.11975466211636861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.13617066542307535
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.1290773351987203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.12773332993189493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,0.6218239863713583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,0.6112639904022217
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,0.543008009592692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,0.6207733154296875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,0.6283573309580485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,0.5543946822484335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,0.3543786605199178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,0.36073601245880127
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,0.3188053369522095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,0.340282678604126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,0.33852799733479816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,0.3022293249766032
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,0.34327467282613117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,0.3476159969965617
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,0.30793599287668866
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,0.20962133010228476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.19118932882944742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.19598400592803955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.1982240080833435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.17762666940689087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.20202134052912393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.20286399126052856
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.18364266554514566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.12913599610328674
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.1304800013701121
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.12151466806729634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.1283199985822042
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.13457066814104715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.11887466907501221
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.13219733039538065
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.13314666350682577
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.11971732974052429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.10140267014503479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.10250666737556458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.09520533680915833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.09922666351000468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.09333333373069763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.10101866722106934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.10087999701499939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.09522666533788045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,0.6420586506525675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,0.6431146860122681
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,0.555738647778829
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,0.6416800022125244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,0.6426080067952474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,0.5712586641311646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,0.3595679998397827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,0.3633333444595337
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,0.32232000430425006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,0.3380800088246663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,0.33960533142089844
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,0.300656000773112
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,0.34494932492574054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,0.3469280004501343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,0.30833067496617633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,0.2039253314336141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,0.20678400993347168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.18717332681020102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.18986133734385172
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.19073599576950073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.17151999473571777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.19434134165445963
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.1955146590868632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.17798399925231934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.12365866700808208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.12294399738311768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.11540800333023071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.11793067057927449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.11793067057927449
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.1139359970887502
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.1269706686337789
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.12258666753768921
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.11173333724339803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.07825066645940144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.07983466486136119
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.07800533374150594
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.07877866427103679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.07342400153477986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.0783786674340566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.07863466441631317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.0743999977906545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.0727946658929189
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.07275733351707458
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.07342933118343353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.07300800085067749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.07321600119272868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.06885333359241486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,0.42875198523203534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,0.4307626485824585
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,0.372869332631429
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,0.4302080074946086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,0.433077335357666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,0.38524266084035236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,0.24477332830429077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,0.24757866064707437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,0.23189866542816162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,0.2379680077234904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,0.23227733373641968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.20800000429153442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,0.2373653252919515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,0.23920534054438272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.21207465728123984
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.14104533195495605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.14155200123786926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.13184533516565958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.13172800342241922
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.13291733463605246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.11762133240699768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.13246933619181314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.13315199812253317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.12276800473531087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.08524266878763835
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.08735466996828715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.08108266691366832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.08490133285522461
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.07663466533025105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.08470400174458821
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.08710400263468425
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.07663999994595845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.06381333371003468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.06467199822266896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.060592000683148704
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.06192000210285187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.05851200222969055
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.06371200084686279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.060085331400235496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.05840000013510386
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.055386667450269066
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.05606399973233541
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.05973866581916809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.054144000013669334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,0.4894239902496338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,0.49321599801381427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,0.42798932393391925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,0.49056001504262287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,0.49349331855773926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,0.45468799273173016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,0.28018667300542194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,0.2813599904378255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,0.252895991007487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,0.2555253307024638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,0.2568639914194743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.22868265708287558
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,0.26528533299763996
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,0.2571199933687846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.2337813377380371
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.15161066253980002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.16029866536458334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.14574399590492249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.13927466670672098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.13460800051689148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.12549866239229837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.14314666390419006
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.1441920002301534
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.1328480045000712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.08706667025883992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.08887466788291931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.08370666702588399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.08230933547019958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.08212266862392426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.07421866556008656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.08299200236797333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.0848533312479655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.07789333164691925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.054757331808408104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.04860800007979075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.046538665890693665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.0484799991051356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.04885333279768626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.0483893354733785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.04849599798520406
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.04428799947102865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.044981335600217186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.042170668641726174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.044031997521718345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,0.32526934146881104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,0.3266719977060954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,0.2868373394012451
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,0.33295466502507526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,0.33680001894632977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,0.29713066418965656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.19154665867487589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.19597333669662476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.17683732509613037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.17441600561141968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.17511999607086182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.1569973329703013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.18013334274291992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.1813919941584269
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.16383467117945352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.10869333148002625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.10987733801205952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.1035146713256836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.09547733267148335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.0958026647567749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.09909866253534953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.10102933645248413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.09325866897900899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.06418133278687795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.06596800188223521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.06239999830722809
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.06246933341026306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.05610666672388712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.06211199859778086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.06465066472689311
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.059018666545550026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.046015997727712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.04461866617202759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.04199466605981191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.03844800094763438
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.04050666590531667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.03933866570393244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.037978666524092354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.03797333439191183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,0.33370665709177655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,0.330186665058136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,0.3253440062204997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,0.3432106574376424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,0.33975998560587567
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,0.33322133620580036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,0.19536532958348593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,0.1918720006942749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.184063990910848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.1800853411356608
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.1721493403116862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.17495467265446982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.181386669476827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.18158932526906332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.17669866482416788
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.1113813320795695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.10773866375287373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.10667733351389568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.09576533238093059
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.09665066997210185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.09325866897900899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.10133866469065349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.10026133060455322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.10019200046857198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.06249066690603892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.0631573349237442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.060693333546320595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.06057600180308024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.054645334680875145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.059578667084376015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.056277334690093994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.04033066580692927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.0391893337170283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.03890133400758108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.03209066639343897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.03200533241033554
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.030159999926884968
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.029893333713213604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.03189333279927572
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,0.2820213238398234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,0.27426133553187054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.29178667068481445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,0.2955519954363505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,0.28488532702128094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,0.29025065898895264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,0.1671733260154724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.1649333337942759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.16428800423940024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.1486186683177948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.14823466539382935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.15076266725858053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.15457600355148315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.1530080040295919
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.15628266334533691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.09531199932098389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.09194133679072063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.09269866347312927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.08099199831485748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.08095466593901317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.0791733314593633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.08668800195058186
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.08483733733495076
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.08553066849708557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.052255998055140175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.05017066498597463
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.05031466484069824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.03488533447186152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.024080000817775726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.02383466561635335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.02364266663789749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.02402133246262868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.021759999295075733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.13586666186650595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.13195199767748514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.14201600352923074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.14177599549293518
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.13710932930310568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.14645866552988687
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.09037333726882935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.08724266290664673
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.08770133058230083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.076773335536321
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.0751093327999115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.07469333211580913
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.07901333272457123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.0767146646976471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.08333333333333333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.047295997540156044
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.04937066634496053
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.04455466568470001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.04403733213742574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.04577599962552389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.02015999952952067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.019962667177120846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.07965333263079326
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.07936533292134602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.0787306676308314
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.08296533425649007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.08031466603279114
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.048325334986050926
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.048341333866119385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.05074666440486908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.04818666477998098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.04826666911443075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.046581332882245384
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.030037333567937214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.02179199953873952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.05625066657861074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.05229333539803823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.05645333230495453
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.05454933146635691
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.03788266579310099
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.03799466788768768
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.024175999065240223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.04611733555793762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.04785066843032837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.031093334158261616
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.03211733450492223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.03197333216667175
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.018197332819302876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.04014399896065394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.03922666609287262
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.028021333118279774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.027701333165168762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.026213333010673523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,float16,0,0.9056746959686279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,fp8,0,0.9185492992401123
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16384,2,1,128,0,1,fp8,fp8,0,0.816858688990275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,float16,0,0.5446773370107015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,fp8,0,0.548090656598409
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,2,128,0,1,fp8,fp8,0,0.47093868255615234
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,float16,0,0.5174026489257812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,fp8,0,0.5189919869105021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,2,1,128,0,1,fp8,fp8,0,0.46246933937072754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,float16,0,0.3315359950065613
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,fp8,0,0.3333119948705037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,2,128,0,1,fp8,fp8,0,0.29975465933481854
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,float16,0,0.3296533425649007
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,fp8,0,0.3298133412996928
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,2,1,128,0,1,fp8,fp8,0,0.2939680020014445
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,float16,0,0.2166666587193807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,fp8,0,0.21799999475479126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,2,128,0,1,fp8,fp8,0,0.20269866784413657
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,float16,0,0.21723200877507529
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,fp8,0,0.21749866008758545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,2,1,128,0,1,fp8,fp8,0,0.20152533054351807
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,float16,0,0.565445343653361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,fp8,0,0.5692373514175415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,12288,2,1,128,0,1,fp8,fp8,0,0.5030346711476644
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,float16,0,0.3394613265991211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,fp8,0,0.3408213456471761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,2,128,0,1,fp8,fp8,0,0.3073280056317647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,float16,0,0.3309599955876668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,fp8,0,0.332533339659373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,2,1,128,0,1,fp8,fp8,0,0.29967466990152997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,float16,0,0.21405333280563354
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,fp8,0,0.21543999512990317
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,2,128,0,1,fp8,fp8,0,0.19550400972366333
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,float16,0,0.21196800470352173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,fp8,0,0.2132533391316732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,2,1,128,0,1,fp8,fp8,0,0.19364800055821738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,float16,0,0.16685332854588827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,fp8,0,0.16671466827392578
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,2,128,0,1,fp8,fp8,0,0.15616533160209656
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,float16,0,0.1660426656405131
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,fp8,0,0.16666666666666666
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,2,1,128,0,1,fp8,fp8,0,0.15477333466211954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,float16,0,0.42386666933695477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,fp8,0,0.4252053499221802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,10240,2,1,128,0,1,fp8,fp8,0,0.38119999567667645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,float16,0,0.26152000824610394
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,fp8,0,0.26390933990478516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,2,128,0,1,fp8,fp8,0,0.23863466580708823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,float16,0,0.2561226685841878
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,fp8,0,0.2584693431854248
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,2,1,128,0,1,fp8,fp8,0,0.23244265715281168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,float16,0,0.15922133127848306
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,fp8,0,0.16055466731389365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,2,128,0,1,fp8,fp8,0,0.1481653352578481
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,float16,0,0.15972266594568887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,fp8,0,0.16081600387891135
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,2,1,128,0,1,fp8,fp8,0,0.14612266421318054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,float16,0,0.14084266622861227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,fp8,0,0.14192533493041992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,2,128,0,1,fp8,fp8,0,0.1341919998327891
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,float16,0,0.14044800400733948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,fp8,0,0.14205333590507507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,2,1,128,0,1,fp8,fp8,0,0.1321440041065216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,float16,0,0.5319946606953939
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,fp8,0,0.5356906652450562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,8192,2,1,128,0,1,fp8,fp8,0,0.4734026590983073
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,float16,0,0.3109333316485087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,fp8,0,0.3142613371213277
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,2,128,0,1,fp8,fp8,0,0.282096008459727
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,float16,0,0.30165332555770874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,fp8,0,0.3023093342781067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,2,1,128,0,1,fp8,fp8,0,0.27321600914001465
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,float16,0,0.1893440087636312
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,fp8,0,0.19178666671117148
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,2,128,0,1,fp8,fp8,0,0.17717333634694418
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,float16,0,0.18766399224599203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,fp8,0,0.18899200359980264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,2,1,128,0,1,fp8,fp8,0,0.17203199863433838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,float16,0,0.12267733613650005
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,fp8,0,0.12404800454775493
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,2,128,0,1,fp8,fp8,0,0.11556266744931538
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,float16,0,0.12344533205032349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,fp8,0,0.1239306628704071
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,2,1,128,0,1,fp8,fp8,0,0.11356266339619954
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,float16,0,0.11607999602953593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,fp8,0,0.11867733796437581
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,2,128,0,1,fp8,fp8,0,0.11029866337776184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,float16,0,0.11813867092132568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,fp8,0,0.1176639993985494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,2,1,128,0,1,fp8,fp8,0,0.10810133814811707
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,float16,0,0.34463465213775635
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,fp8,0,0.3471733331680298
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,6144,2,1,128,0,1,fp8,fp8,0,0.3115573326746623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,float16,0,0.20624534289042154
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,fp8,0,0.2068586746851603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,2,128,0,1,fp8,fp8,0,0.18954133987426758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,float16,0,0.19634133577346802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,fp8,0,0.19860267639160156
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,2,1,128,0,1,fp8,fp8,0,0.18153067429860434
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,float16,0,0.1279146671295166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,fp8,0,0.1285920043786367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,2,128,0,1,fp8,fp8,0,0.11966400345166524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,float16,0,0.12574399511019388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,fp8,0,0.127018670241038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,2,1,128,0,1,fp8,fp8,0,0.11575999855995178
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,float16,0,0.09544000029563904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,fp8,0,0.09724266330401103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,2,128,0,1,fp8,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,float16,0,0.09539733330408733
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,fp8,0,0.09737066427866618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,2,1,128,0,1,fp8,fp8,0,0.09087466200192769
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,float16,0,0.09266666571299235
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,fp8,0,0.09329600135485332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,2,128,0,1,fp8,fp8,0,0.08692266543706258
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,float16,0,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,2,1,128,0,1,fp8,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,float16,0,0.3542399803797404
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,fp8,0,0.3563786745071411
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,4096,2,1,128,0,1,fp8,fp8,0,0.31989866495132446
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,float16,0,0.20775467157363892
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,fp8,0,0.20998932917912802
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,2,128,0,1,fp8,fp8,0,0.19059733549753824
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,float16,0,0.1968053380648295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,fp8,0,0.19634666045506796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,2,1,128,0,1,fp8,fp8,0,0.18028799692789713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,float16,0,0.12018133203188579
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,fp8,0,0.12154133121172588
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,2,128,0,1,fp8,fp8,0,0.11412266890207927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,float16,0,0.11755733688672383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,fp8,0,0.11825600266456604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,2,1,128,0,1,fp8,fp8,0,0.10756267110506694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,float16,0,0.07660266757011414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,fp8,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,2,128,0,1,fp8,fp8,0,0.07274666428565979
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,float16,0,0.07682133217652638
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,fp8,0,0.07683200140794118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,2,1,128,0,1,fp8,fp8,0,0.07054399947325389
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,float16,0,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,fp8,0,0.07071466743946075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,2,128,0,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,float16,0,0.07030933101971944
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,fp8,0,0.07037866612275441
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,2,1,128,0,1,fp8,fp8,0,0.06617600222428639
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,float16,0,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,fp8,0,0.06643733382225037
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,2,128,0,1,fp8,fp8,0,0.06201066573460897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,float16,0,0.06651199857393901
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,2,1,128,0,1,fp8,fp8,0,0.06229866544405619
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,float16,0,0.23483200867970785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,fp8,0,0.2361546754837036
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,3072,2,1,128,0,1,fp8,fp8,0,0.21303999423980713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,float16,0,0.13966400424639383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,fp8,0,0.14105600118637085
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,2,128,0,1,fp8,fp8,0,0.13177067041397095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,float16,0,0.12989333271980286
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,fp8,0,0.12995200355847678
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,2,1,128,0,1,fp8,fp8,0,0.12138133247693379
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,float16,0,0.08270399769147237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,fp8,0,0.08514666557312012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,2,128,0,1,fp8,fp8,0,0.08029866715272267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,float16,0,0.08288000027338664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,fp8,0,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,2,1,128,0,1,fp8,fp8,0,0.07482666770617168
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,float16,0,0.062181333700815834
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,2,128,0,1,fp8,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,float16,0,0.060831998785336815
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,fp8,0,0.0602400004863739
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,2,1,128,0,1,fp8,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,float16,0,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,2,128,0,1,fp8,fp8,0,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,float16,0,0.05676266551017761
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,fp8,0,0.05713599920272827
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,2,1,128,0,1,fp8,fp8,0,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,float16,0,0.05442133545875549
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,fp8,0,0.05418133238951365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,2,128,0,1,fp8,fp8,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,float16,0,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,2,1,128,0,1,fp8,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,float16,0,0.2584960063298543
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,fp8,0,0.25893867015838623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,2048,2,1,128,0,1,fp8,fp8,0,0.23931199312210083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,float16,0,0.15450666348139444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,fp8,0,0.1556053360303243
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,2,128,0,1,fp8,fp8,0,0.1432266632715861
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,float16,0,0.13849600156148276
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,fp8,0,0.13928000132242838
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,2,1,128,0,1,fp8,fp8,0,0.1323360006014506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,float16,0,0.08694932858149211
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,fp8,0,0.08785067001978557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,2,128,0,1,fp8,fp8,0,0.08514666557312012
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,float16,0,0.08178666730721791
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,fp8,0,0.08272000153859456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,2,1,128,0,1,fp8,fp8,0,0.07749866445859273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,float16,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,fp8,0,0.0554720014333725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,2,128,0,1,fp8,fp8,0,0.051957334081331887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,float16,0,0.05234666665395101
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,2,1,128,0,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,float16,0,0.04776533444722494
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,2,128,0,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,float16,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,2,1,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,float16,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,fp8,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,2,128,0,1,fp8,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,float16,0,0.04359999795754751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,2,1,128,0,1,fp8,fp8,0,0.040149333576361336
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,float16,0,0.04203199843565623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,2,128,0,1,fp8,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,float16,0,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,2,1,128,0,1,fp8,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,float16,0,0.1808799902598063
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,fp8,0,0.1814240018526713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1536,2,1,128,0,1,fp8,fp8,0,0.16436266899108887
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,float16,0,0.10564266641934712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,fp8,0,0.10762666662534077
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,2,128,0,1,fp8,fp8,0,0.10152000188827515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,float16,0,0.0969599982102712
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,fp8,0,0.09878399968147278
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,2,1,128,0,1,fp8,fp8,0,0.09222400188446045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,float16,0,0.06258133550484975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,fp8,0,0.06426133215427399
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,2,128,0,1,fp8,fp8,0,0.061893333991368614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,0.3376266558965047
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,float16,0,0.06213866670926412
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,fp8,0,0.06513600051403046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,2,1,128,0,1,fp8,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,float16,0,0.044639999667803444
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,2,128,0,1,fp8,fp8,0,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,float16,0,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,fp8,0,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,2,1,128,0,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,2,128,0,1,fp8,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,float16,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,fp8,0,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,2,1,128,0,1,fp8,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,float16,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,2,128,0,1,fp8,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,2,1,128,0,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,float16,0,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,2,128,0,1,fp8,fp8,0,0.03176533430814743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,2,1,128,0,1,fp8,fp8,0,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,float16,0,0.18107734123865762
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,fp8,0,0.17871999740600586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,1024,2,1,128,0,1,fp8,fp8,0,0.17686933279037476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,float16,0,0.1086293359597524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,fp8,0,0.10775466759999593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,2,128,0,1,fp8,fp8,0,0.10392000277837117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,float16,0,0.09955199559529622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,fp8,0,0.09836266438166301
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,2,1,128,0,1,fp8,fp8,0,0.09886933366457622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,float16,0,0.06225599845250448
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,fp8,0,0.061946665247281395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,2,128,0,1,fp8,fp8,0,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,float16,0,0.059477334221204124
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,2,1,128,0,1,fp8,fp8,0,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,float16,0,0.03910933434963226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,2,128,0,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,float16,0,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,2,1,128,0,1,fp8,fp8,0,0.036992001036802925
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,float16,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,2,128,0,1,fp8,fp8,0,0.03192000091075897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,float16,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,2,1,128,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,2,128,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,float16,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,2,1,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,float16,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,2,128,0,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,float16,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,2,1,128,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,2,128,0,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,float16,0,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,2,1,128,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,float16,0,0.15266666809717813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,fp8,0,0.15005866686503092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,512,2,1,128,0,1,fp8,fp8,0,0.15710399548212686
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,float16,0,0.09409067034721375
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,fp8,0,0.091648002465566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,2,128,0,1,fp8,fp8,0,0.09364266196886699
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,float16,0,0.08273600041866302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,fp8,0,0.08349866668383281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,2,1,128,0,1,fp8,fp8,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,float16,0,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,fp8,0,0.051829333106676735
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,2,128,0,1,fp8,fp8,0,0.05318933228651682
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,float16,0,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,fp8,0,0.050245334704717
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,2,1,128,0,1,fp8,fp8,0,0.048197334011395775
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,fp8,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,float16,0,0.03176533430814743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,1,128,0,1,fp8,fp8,0,0.03181866556406021
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,2,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,2,1,128,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,2,128,0,1,fp8,fp8,0,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,float16,0,0.02842666705449422
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,2,1,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,2,128,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,2,1,128,0,1,fp8,fp8,0,0.02179199953873952
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,2,128,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,2,1,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,2,128,0,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,2,1,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,float16,0,0.07895466685295105
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,fp8,0,0.07683200140794118
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,256,2,1,128,0,1,fp8,fp8,0,0.08291199803352356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,float16,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,fp8,0,0.046021332343419395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,2,128,0,1,fp8,fp8,0,0.04937600096066793
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,2,1,128,0,1,fp8,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,float16,0,0.030576000610987347
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,2,128,0,1,fp8,fp8,0,0.030405332644780476
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,float16,0,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,2,1,128,0,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,2,128,0,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,2,1,128,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,float16,0,0.022837333381175995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,2,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,float16,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,fp8,0,0.021776000658671062
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,2,1,128,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,2,128,0,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,fp8,0,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,2,1,128,0,1,fp8,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,float16,0,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,2,128,0,1,fp8,fp8,0,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,2,1,128,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,2,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,2,1,128,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,2,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,2,1,128,0,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,float16,0,0.048341333866119385
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,128,2,1,128,0,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,float16,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,2,128,0,1,fp8,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,2,1,128,0,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,2,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,2,1,128,0,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,float16,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,2,128,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,2,1,128,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,2,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,2,1,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,2,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,2,1,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,2,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,2,1,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,2,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,2,1,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,2,128,0,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,2,1,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,64,2,1,128,0,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,2,128,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,2,1,128,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,2,128,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,2,1,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,2,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,2,1,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,2,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,2,1,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,float16,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,2,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,float16,0,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,2,1,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,2,1,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,2,128,0,1,fp8,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,2,1,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,2,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,2,1,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,32,2,1,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,2,128,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,2,1,128,0,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,2,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,2,1,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,2,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,float16,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,2,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,2,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,2,1,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,2,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,2,1,128,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,2,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,2,1,128,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,2,1,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,2,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,2,1,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,float16,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,256,16,2,1,128,0,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,float16,0,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,2,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,float16,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,2,1,128,0,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,float16,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,2,1,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,2,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,2,1,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,2,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,float16,0,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,2,1,128,0,1,fp8,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,float16,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,2,128,0,1,fp8,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,2,1,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,2,128,0,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,float16,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,2,1,128,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,2,1,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,2,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,2,1,128,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,float16,0,0.335749348004659
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,fp8,0,0.3267359932263692
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16384,1,1,128,0,1,fp8,fp8,0,0.2967733343442281
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,float16,0,0.21403199434280396
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,fp8,0,0.21419199307759604
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16384,1,1,128,0,1,fp8,fp8,0,0.1959893306096395
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,float16,0,0.2055093248685201
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,fp8,0,0.20580265919367471
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16384,1,1,128,0,1,fp8,fp8,0,0.1897546648979187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,float16,0,0.2165706753730774
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,fp8,0,0.21714667479197183
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,12288,1,1,128,0,1,fp8,fp8,0,0.19381332397460938
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,float16,0,0.16338133811950684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,fp8,0,0.16475199659665427
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,12288,1,1,128,0,1,fp8,fp8,0,0.15169599652290344
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,float16,0,0.1588320036729177
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,fp8,0,0.1588213344415029
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,12288,1,1,128,0,1,fp8,fp8,0,0.1462613344192505
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,float16,0,0.15666666626930237
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,fp8,0,0.15760533014933267
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,10240,1,1,128,0,1,fp8,fp8,0,0.14447466532389322
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,float16,0,0.14009599884351095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,fp8,0,0.14028799533843994
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,10240,1,1,128,0,1,fp8,fp8,0,0.12896000345547995
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,float16,0,0.13456533352533975
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,fp8,0,0.1362826625506083
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,10240,1,1,128,0,1,fp8,fp8,0,0.12622400124867758
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,float16,0,0.20000000794728598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,fp8,0,0.19878933827082315
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,8192,1,1,128,0,1,fp8,fp8,0,0.1771893302599589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,float16,0,0.1216159959634145
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,fp8,0,0.12361600001653035
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,8192,1,1,128,0,1,fp8,fp8,0,0.11377599835395813
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,float16,0,0.11389333009719849
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,fp8,0,0.11524266997973125
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,8192,1,1,128,0,1,fp8,fp8,0,0.10725866754849751
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,float16,0,0.11140799522399902
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,fp8,0,0.11339199542999268
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,8192,1,1,128,0,1,fp8,fp8,0,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,float16,0,0.12636799613634744
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,fp8,0,0.12769066294034323
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,6144,1,1,128,0,1,fp8,fp8,0,0.11751466989517212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,float16,0,0.09493333101272583
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,fp8,0,0.09524266918500264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,1,1,128,0,1,fp8,fp8,0,0.08915199836095174
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,float16,0,0.09108266234397888
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,fp8,0,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,6144,1,1,128,0,1,fp8,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,float16,0,0.08750399947166443
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,fp8,0,0.08910933136940002
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,6144,1,1,128,0,1,fp8,fp8,0,0.08264000217119853
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,float16,0,0.12355732917785645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,fp8,0,0.12546666463216147
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,4096,1,1,128,0,1,fp8,fp8,0,0.11538133025169373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,float16,0,0.07668266693751018
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,fp8,0,0.07859200239181519
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,4096,1,1,128,0,1,fp8,fp8,0,0.07355199754238129
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,float16,0,0.07052266597747803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,fp8,0,0.0706826647122701
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,4096,1,1,128,0,1,fp8,fp8,0,0.0664160003264745
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,float16,0,0.06637333333492279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,fp8,0,0.0664213349421819
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,4096,1,1,128,0,1,fp8,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,float16,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,fp8,0,0.06452799836794536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,4096,1,1,128,0,1,fp8,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,float16,0,0.08494399984677632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,fp8,0,0.08640533685684204
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,3072,1,1,128,0,1,fp8,fp8,0,0.07960000137488048
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,float16,0,0.06223999957243601
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,fp8,0,0.06217066446940104
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,3072,1,1,128,0,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,float16,0,0.05624533196290334
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,fp8,0,0.0569706658522288
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,3072,1,1,128,0,1,fp8,fp8,0,0.054560000697771706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,float16,0,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,3072,1,1,128,0,1,fp8,fp8,0,0.05053866902987162
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,float16,0,0.053397332628568016
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,fp8,0,0.05232533315817515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,3072,1,1,128,0,1,fp8,fp8,0,0.04830400149027506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,float16,0,0.08865599830945332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,fp8,0,0.08943999807039897
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,2048,1,1,128,0,1,fp8,fp8,0,0.08497599760691325
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,float16,0,0.05629333357016245
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,2048,1,1,128,0,1,fp8,fp8,0,0.052485331892967224
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,float16,0,0.04786666731039683
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,fp8,0,0.04827733337879181
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,2048,1,1,128,0,1,fp8,fp8,0,0.046096002062161766
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,float16,0,0.04393066465854645
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,2048,1,1,128,0,1,fp8,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,2048,1,1,128,0,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,float16,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,2048,1,1,128,0,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,float16,0,0.06452799836794536
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1536,1,1,128,0,1,fp8,fp8,0,0.06433600187301636
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,float16,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,fp8,0,0.04619733492533366
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1536,1,1,128,0,1,fp8,fp8,0,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,float16,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,fp8,0,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1536,1,1,128,0,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,float16,0,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,fp8,0,0.037791999677817024
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1536,1,1,128,0,1,fp8,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,float16,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,fp8,0,0.03602666656176249
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1536,1,1,128,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,fp8,0,0.035936000446478523
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1536,1,1,128,0,1,fp8,fp8,0,0.03314133236805598
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,float16,0,0.06128533184528351
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,fp8,0,0.06113600234190623
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,1024,1,1,128,0,1,fp8,fp8,0,0.06070933242638906
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,float16,0,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,1024,1,1,128,0,1,fp8,fp8,0,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,float16,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,1024,1,1,128,0,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,1024,1,1,128,0,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,fp8,0,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,1,1,128,0,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,float16,0,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,1024,1,1,128,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,1024,1,1,128,0,1,fp8,fp8,0,0.02629866699377696
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,float16,0,0.051541333397229515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,fp8,0,0.05198400219281515
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,512,1,1,128,0,1,fp8,fp8,0,0.05345066885153452
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,float16,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,512,1,1,128,0,1,fp8,fp8,0,0.03202133377393087
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,float16,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,1,1,128,0,1,fp8,fp8,0,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,512,1,1,128,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,512,1,1,128,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,float16,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,512,1,1,128,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,512,1,1,128,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,512,1,1,128,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,float16,0,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,fp8,0,0.030506665507952373
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,256,1,1,128,0,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,float16,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,256,1,1,128,0,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,256,1,1,128,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,256,1,1,128,0,1,fp8,fp8,0,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,256,1,1,128,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,256,1,1,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,256,1,1,128,0,1,fp8,fp8,0,0.020047999918460846
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,256,1,1,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,128,1,1,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,float16,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,128,1,1,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,128,1,1,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,float16,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,128,1,1,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,128,1,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,float16,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,fp8,0,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,128,1,1,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,128,1,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,128,1,1,128,0,1,fp8,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,64,1,1,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,64,1,1,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,64,1,1,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,64,1,1,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,64,1,1,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,64,1,1,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,64,1,1,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,64,1,1,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,32,1,1,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,32,1,1,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,32,1,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,32,1,1,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,32,1,1,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,32,1,1,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,32,1,1,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,32,1,1,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,128,16,1,1,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,64,16,1,1,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,16,1,1,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,16,16,1,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,8,16,1,1,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,16,1,1,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,fp8,0,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,2,16,1,1,128,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,float16,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,1,16,1,1,128,0,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,0.21040000518163046
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA GB200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,float16,0,0.032314665615558624
